Exemplo n.º 1
0
    def process_move_from_state(self):
        new_puck_pos = utils.next_pos_from_state(self.state)

        # after initial position check for idleness
        if self.in_initial_state is not None:
            # if puck position changed, OK
            if new_puck_pos['x'] != self.state['puck_pos']['x']:
                self.in_initial_state = None
            # if number of idle moves exceeded, penalize with goal
            elif self.in_initial_state >= self.max_idle_moves:
                goal_for = 'left' if self.state['puck_pos']['x'] > self.board.shape[1]/2 else 'right'
                self.process_goal_for(goal_for, puck_to=('left' if goal_for == 'left' else 'right'))
                return goal_for
            # if idle but idle moves not exceed, increment counter
            else:
                self.in_initial_state += 1

        # update pos in state
        self.state['puck_pos'] = new_puck_pos

        # if is goal
        if utils.is_goal(self.state) is not None:
            self.process_goal_for(utils.is_goal(self.state))
            return utils.is_goal(self.state)

        # update speed (and direction) in state
        self.state['puck_speed'] = utils.next_speed(self.state)
        return None
Exemplo n.º 2
0
def doorkey_problem(env, info):
    '''
    You are required to find the optimal path in
        doorkey-5x5-normal.env
        doorkey-6x6-normal.env
        doorkey-8x8-normal.env
        
        doorkey-6x6-direct.env
        doorkey-8x8-direct.env
        
        doorkey-6x6-shortcut.env
        doorkey-8x8-shortcut.env
        
    Feel Free to modify this fuction
    '''
    state_space, state_to_idx = generate_state_space(env, info)
    control_space = [MF, TL, TR, PK, UD]
    V, pi = dp.DP(state_space, state_to_idx, control_space, dp.get_next_state,
                  dp.step_cost, dp.terminal_cost, env)

    # get optimal sequence
    state = utils.get_initial_state(info)
    optim_act_seq = []
    t = 0
    while not utils.is_goal(state, info["goal_pos"]):
        state_idx = state_to_idx[utils.hash_state(state)]
        optimal_control = pi[t, state_idx]
        optim_act_seq.append(optimal_control)

        # get next state
        state = dp.get_next_state(state, optimal_control, env)
        t += 1

    return optim_act_seq
def estimate_path(current_state, after_time):
    state = copy.copy(current_state)
    path = []
    while after_time > 0:
        state['puck_pos'] = utils.next_pos_from_state(state)
        if utils.is_goal(state) is not None:
            break
        if utils.next_after_boundaries(state):
            state['puck_speed'] = utils.next_after_boundaries(state)
        path.append((state['puck_pos'], state['puck_speed']))
        after_time -= state['delta_t']
    return path
Exemplo n.º 4
0
def estimate_path(current_state, after_time):
    """ Function that function estimates the next moves in a after_time window
    Returns:
        list: coordinates and speed of puck for next ticks
    """

    state = copy.copy(current_state)
    path = []
    while after_time > 0:
        state['puck_pos'] = utils.next_pos_from_state(state)
        if utils.is_goal(state) is not None:
            break
        if utils.next_after_boundaries(state):
            state['puck_speed'] = utils.next_after_boundaries(state)
        path.append((state['puck_pos'], state['puck_speed']))
        after_time -= state['delta_t']
    return path