def dfs_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 # explore in a dfs way until find target stack = [[sx, sy]] step = 0 visited = np.zeros([config.Map.Height, config.Map.Width], dtype=np.int) # zero for unvisited visited[sx][sy] = 1 while len(stack) > 0: [x, y] = stack[-1] if x == tx and y == ty: break expended = False for i in range(len(utils.dirs)): dx = x + utils.dirs[i][0] dy = y + utils.dirs[i][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall] and visited[dx][dy] == 0: expended = True visited[dx][dy] = 1 stack.append([dx, dy]) step += 1 break if not expended: stack.pop() step += 1 return step
def _reset(self): #utils.displayMap(self.ini_mazemap) [sx, sy, tx, ty] = utils.findSourceAndTarget(self.ini_mazemap) self.source = np.array([sx, sy]) self.target = np.array([tx, ty]) self.mazemap = copy.deepcopy(self.ini_mazemap) return self.mazemap
def right_hand_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 mazemap[sx, sy] = utils.Cell.EmptyV count = 0 cx, cy = sx, sy path = [] cur_dir = 0 dirs = np.array([[1, 0], [0, -1], [-1, 0], [0, 1]]) p = [1, 0, 3, 2] while cx != tx or cy != ty: for i in p: next_dir = (cur_dir + i) % 4 nx, ny = [cx, cy] + dirs[next_dir] if utils.inMap(nx, ny): if mazemap[nx,ny,utils.Cell.Empty] or mazemap[nx,ny,utils.Cell.Target] : cx, cy = nx, ny cur_dir = next_dir break count += 1 path.append([cx, cy]) mazemap[sx, sy] = utils.Cell.SourceV print(count, path) return count
def shortest_path(mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 queue = deque() queue.append([sx, sy]) shortest_path = np.zeros([config.Map.Height, config.Map.Width], dtype=np.int) # zero for unvisited shortest_path[sx][sy] = 1 #utils.displayMap(mazemap) while len(queue): [cx, cy] = queue.popleft() cur_path_len = shortest_path[cx][cy] for k in range(len(utils.dirs)): [nx, ny] = [cx, cy] + utils.dirs[k] if not utils.inMap(nx, ny): continue if mazemap[nx, ny, utils.Cell.Empty] or mazemap[nx, ny, utils.Cell.Target]: if shortest_path[nx][ny] == 0 or shortest_path[nx][ny] > cur_path_len + 1: queue.append([nx, ny]) shortest_path[nx][ny] = cur_path_len + 1 #print('shortest_path:' + str(shortest_path[tx][ty])) #if shortest_path[tx][ty]==11: # utils.displayMap(mazemap) # print('error') return shortest_path[tx][ty]-1
def get_inputs_from_state_and_agent_action(state, action, latent_dim, transition_size): [sx, sy, _, _] = utils.findSourceAndTarget(state) noise = np.ones(latent_dim).reshape((1, latent_dim)) index = sx * config.Map.Width + sy current_pos_onehot = get_pos_onehot(index).reshape((1, transition_size)) potential_pos_onehot = get_potential_pos_onehot(index, action).reshape((1, transition_size)) return noise, current_pos_onehot, potential_pos_onehot
def _reset(self): self.gamestep = 0 self.invalid_count = 0 self.conflict_count = 0 self.mazemap = utils.initMazeMap() [sx, sy, tx, ty] = utils.findSourceAndTarget(self.mazemap) self.source = np.array([sx, sy]) self.target = np.array([tx, ty]) return self.mazemap
def shortest_random_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 queue = deque() queue.append([tx, ty]) shortest_path = np.zeros([config.Map.Height, config.Map.Width], dtype=np.int) # zero for unvisited shortest_path[tx][ty] = 0 #utils.displayMap(mazemap) while len(queue): [cx, cy] = queue.popleft() cur_path_len = shortest_path[cx][cy] for k in range(len(utils.dirs)): [nx, ny] = [cx, cy] + utils.dirs[k] if not utils.inMap(nx, ny): continue if mazemap[nx, ny, utils.Cell.Empty] or mazemap[nx, ny, utils.Cell.Source]: if shortest_path[nx][ny] == 0 or shortest_path[nx][ny] > cur_path_len + 1: queue.append([nx, ny]) shortest_path[nx][ny] = cur_path_len + 1 # go optimal direction in probability $optimal_dir_prob step = 0 max_step = 200 optimal_dir_prob = 0.8 invalid_distance = config.Map.Height * config.Map.Width while (sx != tx or sy != ty) and step < max_step: distance_dirs = [] valid_dir_n = 0 for i in range(len(utils.dirs)): dx = sx + utils.dirs[i][0] dy = sy + utils.dirs[i][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]: distance_dirs.append(shortest_path[dx][dy]) valid_dir_n += 1 else: distance_dirs.append(invalid_distance) prob_dirs = [] for i in range(len(utils.dirs)): if i == np.argmin(distance_dirs): prob_dirs.append(optimal_dir_prob) elif distance_dirs[i] != invalid_distance: prob_dirs.append((1 - optimal_dir_prob) / (valid_dir_n - 1)) else: prob_dirs.append(0.) selected_dir = np.argmax(np.random.multinomial(1, prob_dirs)) sx += utils.dirs[selected_dir][0] sy += utils.dirs[selected_dir][1] step += 1 return step
def get_agent_action(self, state): if self.agent_policy_type == 'OPT': [sx, sy, _, _] = utils.findSourceAndTarget(state) return self.agent_opt_policy[sx][sy] if np.random.random() < 0.1: return np.random.choice(self.agent_action_size, 1)[0] state = copy.deepcopy(state) action = self.agent.forward(state) # print('action: ', action) return action
def rightdownupleft_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 step = 0 max_step = 200 while (sx != tx or sy != ty) and step < max_step: # deterministic order: right, down, up, left for i in range(len(utils.dirs)): dx = sx + utils.dirs[i][0] dy = sy + utils.dirs[i][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]: sx = dx sy = dy break step += 1 return step
def rightdown_random_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 step = 0 max_step = 200 while (sx != tx or sy != ty) and step < max_step: while True: # right 0.4, down 0.4, up 0.1, left 0.1 selected_dir = np.argmax(np.random.multinomial(1, [0.4, 0.4, 0.1, 0.1])) dx = sx + utils.dirs[selected_dir][0] dy = sy + utils.dirs[selected_dir][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]: sx = dx sy = dy break step += 1 return step
def random_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 step = 0 max_step = 200 while (sx != tx or sy != ty) and step < max_step: valid_dirs = [] for i in range(len(utils.dirs)): dx = sx + utils.dirs[i][0] dy = sy + utils.dirs[i][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]: valid_dirs.append(i) selected_dir = valid_dirs[np.random.randint(len(valid_dirs))] sx += utils.dirs[selected_dir][0] sy += utils.dirs[selected_dir][1] step += 1 return step
def rightdown_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 step = 0 max_step = 200 while (sx != tx or sy != ty) and step < max_step: # right dx = sx + utils.dirs[0][0] dy = sy + utils.dirs[0][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]: sx = dx sy = dy else: # down dx = sx + utils.dirs[1][0] dy = sy + utils.dirs[1][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]: sx = dx sy = dy step += 1 return step
def isvalid_mazemap(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return False queue = deque() queue.append([sx,sy]) visited = np.zeros([config.Map.Height, config.Map.Width], dtype=np.int) while len(queue): [cx, cy] = queue.popleft() visited[cx][cy] = 1 for k in range(len(utils.dirs)): [nx, ny] = [cx, cy] + utils.dirs[k] if not utils.inMap(nx, ny) or visited[nx][ny]: continue if mazemap[nx, ny, utils.Cell.Empty]: queue.append([nx, ny]) if nx == tx and ny == ty: return True return False