def process_move_from_state(self): new_puck_pos = utils.next_pos_from_state(self.state) # after initial position check for idleness if self.in_initial_state is not None: # if puck position changed, OK if new_puck_pos['x'] != self.state['puck_pos']['x']: self.in_initial_state = None # if number of idle moves exceeded, penalize with goal elif self.in_initial_state >= self.max_idle_moves: goal_for = 'left' if self.state['puck_pos']['x'] > self.board.shape[1]/2 else 'right' self.process_goal_for(goal_for, puck_to=('left' if goal_for == 'left' else 'right')) return goal_for # if idle but idle moves not exceed, increment counter else: self.in_initial_state += 1 # update pos in state self.state['puck_pos'] = new_puck_pos # if is goal if utils.is_goal(self.state) is not None: self.process_goal_for(utils.is_goal(self.state)) return utils.is_goal(self.state) # update speed (and direction) in state self.state['puck_speed'] = utils.next_speed(self.state) return None
def doorkey_problem(env, info): ''' You are required to find the optimal path in doorkey-5x5-normal.env doorkey-6x6-normal.env doorkey-8x8-normal.env doorkey-6x6-direct.env doorkey-8x8-direct.env doorkey-6x6-shortcut.env doorkey-8x8-shortcut.env Feel Free to modify this fuction ''' state_space, state_to_idx = generate_state_space(env, info) control_space = [MF, TL, TR, PK, UD] V, pi = dp.DP(state_space, state_to_idx, control_space, dp.get_next_state, dp.step_cost, dp.terminal_cost, env) # get optimal sequence state = utils.get_initial_state(info) optim_act_seq = [] t = 0 while not utils.is_goal(state, info["goal_pos"]): state_idx = state_to_idx[utils.hash_state(state)] optimal_control = pi[t, state_idx] optim_act_seq.append(optimal_control) # get next state state = dp.get_next_state(state, optimal_control, env) t += 1 return optim_act_seq
def estimate_path(current_state, after_time): state = copy.copy(current_state) path = [] while after_time > 0: state['puck_pos'] = utils.next_pos_from_state(state) if utils.is_goal(state) is not None: break if utils.next_after_boundaries(state): state['puck_speed'] = utils.next_after_boundaries(state) path.append((state['puck_pos'], state['puck_speed'])) after_time -= state['delta_t'] return path
def estimate_path(current_state, after_time): """ Function that function estimates the next moves in a after_time window Returns: list: coordinates and speed of puck for next ticks """ state = copy.copy(current_state) path = [] while after_time > 0: state['puck_pos'] = utils.next_pos_from_state(state) if utils.is_goal(state) is not None: break if utils.next_after_boundaries(state): state['puck_speed'] = utils.next_after_boundaries(state) path.append((state['puck_pos'], state['puck_speed'])) after_time -= state['delta_t'] return path