示例#1
0
def _FindPos_AllKMers(sequence,k):
    'Returns a dictionary {kmer: [appearing_positions]} containing all the kmers in sequence'
    seqs = {}
    current_seq = [sequence[i] for i in range(k)]
    seqs = {utils.seq2str(current_seq): [0]}
    for i in range(1, len(sequence)-k+1):
        current_seq = current_seq[1:] + [sequence[i+k-1]]
        if utils.seq2str(current_seq) in seqs.keys():
            seqs[utils.seq2str(current_seq)].append(i)
        else:
            seqs[utils.seq2str(current_seq)] = [i]
    return seqs
示例#2
0
    def solve_simple(self, simple_seq: Seq, state: State) -> typing.List[int]:
        self.log.info('solving simple sequence %s', seq2str(simple_seq))
        self.push_task_stack(state, f'solve {seq2str(simple_seq)}')

        # search for the path
        assert self.pos == simple_seq[0]
        path = self.search_path(simple_seq[0], simple_seq[-1], len(simple_seq),
                                len(simple_seq), True, state)

        # move to the end point
        # this may involve additional searching
        self.push_task_stack(state, f'move to {simple_seq[-1]}')
        move_stack = [[simple_seq[-1]]]
        while self.pos != move_stack[-1][0]:
            subpath = self.search_path(self.pos, simple_seq[-1], 0, 1024, False,
                                       state)
            move_stack.append(subpath)
        for subpath in reversed(move_stack[1:]):
            self.move_along_path(subpath, state)
        self.pop_task_stack(state)

        self.log.info('solved simple sequence %s: %s', simple_seq, path)
        self.pop_task_stack(state)
        for n in path:
            self.visited_nodes.add(n)
        return path
示例#3
0
def _kmersWithAlterations_at_index(inp,
                                   curr_index,
                                   prev_combs,
                                   length,
                                   max_hamming,
                                   alphabet='ATCG'):
    'Returns all the kmers with alterations at curr_index. It calculates from previous data, so we do not need to start from scratch at every index position'
    new_combs = {}
    if len(prev_combs) == 0:
        # Initialize
        if max_hamming > 0:
            for alph in alphabet:
                new_combs[alph] = 1
        new_combs[inp[0]] = 0
    else:
        # First, we eliminate the first character of each possible sequence so far
        current_combs = _update_prev_list(inp, curr_index, prev_combs, length,
                                          max_hamming)
        # Then, we generate the new batch by adding one character at the end
        for now_comb in current_combs:
            now_ham = current_combs[now_comb]
            if now_ham == max_hamming:
                new_combination = [x for x in now_comb] + [
                    inp[curr_index]
                ]  # no permutate in this case
                new_distance = max_hamming
                new_combs[utils.seq2str(
                    new_combination)] = new_combs.setdefault(
                        utils.seq2str(new_combination), max_hamming)
            else:
                for alp in alphabet:
                    new_combination = [x for x in now_comb
                                       ] + [alp]  # no permutate in this case
                    if alp == inp[curr_index]:
                        new_distance = now_ham
                    else:
                        new_distance = now_ham + 1
                    new_combs[utils.seq2str(new_combination)] = min(
                        new_combs.setdefault(utils.seq2str(new_combination),
                                             max_hamming), new_distance)
    return new_combs
示例#4
0
def solve(graph: nx.DiGraph, i: int, seq: Seq, correct: SolvedSeq,
          savedir: str):
    i = i + 1
    savedir = os.path.join(savedir, 'simulations')
    os.makedirs(savedir, exist_ok=True)
    agent = Agent(seq, f'Agent-{i}')
    state = State(graph, f'State-{i}')
    solved_seq = agent.solve(state)
    if solved_seq != correct:
        raise ValueError(f'{i}: incorrect solve')
    state.generate_progress(os.path.join(savedir, f'solve_{i}.pdf'),
                            solved_seq, seq2str(seq))
示例#5
0
def _update_prev_list(inp, curr_index, prev_combs, length, max_hamming):
    'Returns the list of current possible d-distance sequences by eliminating the previous element of each sequence, merging equal resulting subsequences and updating hamming distances'
    # Propagate result
    if len(list(prev_combs.keys())[0]) != length:
        current_combs = prev_combs
    # First, we clean current_combinations (we drop first character)
    else:
        current_combs = {}
        for now_comb in prev_combs:
            # New combinations drop the first character (slide window)
            new_combination = now_comb[1:]
            if now_comb[0] == inp[max(curr_index - length,
                                      0)]:  # hamming distance does not change
                new_distance = prev_combs[now_comb]
                current_combs[utils.seq2str(new_combination)] = min(
                    current_combs.setdefault(utils.seq2str(new_combination),
                                             max_hamming), new_distance)
            else:  # the new sequence has shorter hamming entropy (we drop a difference)
                current_combs[utils.seq2str(new_combination)] = min(
                    current_combs.setdefault(utils.seq2str(new_combination),
                                             max_hamming),
                    prev_combs[now_comb] - 1)
    return current_combs
示例#6
0
def draw(g: nx.DiGraph,
         sqs: typing.List[typing.Tuple[Seq, SolvedSeq]],
         title: str = ''):
    plt.figure(0, figsize=(10, 5))
    plt.clf()
    if title:
        plt.suptitle(title)

    plt.subplot(1, 2, 1)
    nx.draw_networkx(g, pos=dict(g.nodes('pos')))
    plt.subplot(1, 2, 2)
    for i, (s, ss) in enumerate(sqs):
        plt.text(0,
                 i / len(sqs),
                 '{: >2}: {}'.format(i, seq2str(s, aligned=True,
                                                align_size=2)),
                 fontfamily='monospace')
    plt.ylim((0, 1))
    plt.axis('off')
示例#7
0
    def solve(self, state: State):
        self.log.info('solving master sequence %s', seq2str(self.seq))
        self.push_task_stack(state, f'solve {seq2str(self.seq)}')

        assert self.seq[0] == self.pos

        # always peek node type at the current position
        state.peek_node_type(self)

        # split sequence into subsequences that start and end with known nodes
        # and have only unknown nodes in between
        subseqs = split_seq(self.seq)
        solved_seq = []
        for subseq in subseqs:
            # solve each simple sequence
            solved_subseq = self.solve_simple(subseq, state)
            solved_seq = join(solved_seq, solved_subseq)

        return solved_seq
示例#8
0

def _run_or_rebot_from_cli(method, cliargs, usage, **argparser_config):
    LOGGER.register_file_logger()
    ap = utils.ArgumentParser(usage, get_full_version())
    try:
        options, datasources = \
            ap.parse_args(cliargs, argfile='argumentfile', unescape='escape',
                          help='help', version='version', check_args=True,
                          **argparser_config)
    except Information, msg:
        _exit(INFO_PRINTED, utils.unic(msg))
    except DataError, err:
        _exit(DATA_ERROR, utils.unic(err))

    LOGGER.info('Data sources: %s' % utils.seq2str(datasources))
    try:
        suite = method(*datasources, **options)
    except DataError, err:
        _exit(DATA_ERROR, unicode(err))
    except (KeyboardInterrupt, SystemExit):
        _exit(STOPPED_BY_USER, 'Execution stopped by user.')
    except:
        error, details = utils.get_error_details()
        _exit(FRAMEWORK_ERROR, 'Unexpected error: %s' % error, details)
    else:
        _exit(_failed_critical_test_count(suite))


def _failed_critical_test_count(suite):
    rc = suite.critical_stats.failed
def rebot_from_cli(args, usage):
    LOGGER.info(get_full_version('Rebot'))
    return _run_or_rebot_from_cli(run_rebot, args, usage)

def _run_or_rebot_from_cli(method, cliargs, usage, **argparser_config):
    LOGGER.register_file_logger()
    try:
        options, datasources = _parse_arguments(cliargs, usage,
                                                **argparser_config)
    except Information, msg:
        print utils.encode_output(unicode(msg))
        return INFO_PRINTED
    except DataError, err:
        _report_error(unicode(err), help=True)
        return DATA_ERROR
    LOGGER.info('Data sources: %s' % utils.seq2str(datasources))
    return _execute(method, datasources, options)

def _parse_arguments(cliargs, usage, **argparser_config):
    ap = utils.ArgumentParser(usage, get_full_version())
    return ap.parse_args(cliargs, argfile='argumentfile', unescape='escape',
                         help='help', version='version', check_args=True,
                         **argparser_config)

def _execute(method, datasources, options):
    try:
        suite = method(*datasources, **options)
    except DataError, err:
        _report_error(unicode(err), help=True)
        return DATA_ERROR
    except (KeyboardInterrupt, SystemExit):