def codalab_run(run_id): # a=ppp((5,5),(((1,1), (3,3)),),(0,0)) a = None if run_id == 0: a = ppp((10, 4), (((0, 3), (0, 6)), ((3, 3), (3, 6)))) if run_id == 1: a = ppp((10, 10), ( ((1, 1), (1, 2)), ((1, 2), (3, 2)), ((6, 1), (8, 1)), ((8, 1), (8, 3)), ((6, 3), (8, 3)), ((3, 4), (4, 4)), ((4, 4), (4, 5)), ((6, 6), (6, 7)), ((1, 7), (2, 8)), ), (0, 0)) obstacles = list(a.env.block_area) occupancy = astar.DetOccupancyGrid2D(a.env.map.height, a.env.map.width, obstacles) aa = [] while not a.end(): action = exptimax(a, 9) X, Y = a.env.next_location(action) m = a.env.entire_map() if m[X][Y] == a.env.map.VISITED: x_init = a.env.agent_location() x_goal = a.env.remaining_nodes()[0] Astar = astar.AStar((0, 0), (a.env.map.height, a.env.map.width), x_init, x_goal, occupancy) if not Astar.solve(): print("Not Solve") else: for j in range(len(Astar.path) - 1): a1, b1 = Astar.path[j] a2, b2 = Astar.path[j + 1] if a2 == a1 - 1 and b1 == b2: a.env.step(a.env.UP) elif a2 == a1 + 1 and b1 == b2: a.env.step(a.env.DOWN) elif a2 == a1 and b2 == b1 - 1: a.env.step(a.env.LEFT) elif a2 == a1 and b2 == b1 + 1: a.env.step(a.env.RIGHT) # aa.append(a.env.agent_location()) else: aa.append(a.env.agent_location()) a.env.step(action) print(action) aa.append(a.env.agent_location()) print(aa, a.env.agent_distance, a.env.agent_turns) stats = { "turn": a.env.agent_turns, "dist": a.env.agent_distance, "notes": "" } return stats
def main(): # a=ppp((5,5),(((1,1), (3,3)),),(0,0)) a = ppp((10, 10), ( ((1, 1), (1, 2)), ((1, 2), (3, 2)), ((6, 1), (8, 1)), ((8, 1), (8, 3)), ((6, 3), (8, 3)), ((3, 4), (4, 4)), ((4, 4), (4, 5)), ((6, 6), (6, 7)), ((1, 7), (2, 8)), ), (0, 0)) obstacles = list(a.env.block_area) occupancy = astar.DetOccupancyGrid2D(a.env.map.width, a.env.map.height, obstacles) aa = [] while not a.end(): action = exptimax(a, 9) X, Y = a.env.next_location(action) m = a.env.entire_map() if m[X][Y] == a.env.map.VISITED: newx, newy = a.env.remaining_nodes()[0] x_init = a.env.agent_location() a.env.agentX = newx a.env.agentY = newy a.env.map.visit(newx, newy) x_goal = a.env.agent_location() Astar = astar.AStar((0, 0), (a.env.map.width, a.env.map.height), x_init, x_goal, occupancy) if not Astar.solve(): print("Not Solve") else: a.env.agent_distance += len(Astar.path) for j in range(len(Astar.path) - 1): a1, b1 = Astar.path[j] a2, b2 = Astar.path[j + 1] if a1 != a2 and b1 != b2: a.env.agent_turns += 1 a.env.path.extend(Astar.path) # aa.append(a.env.agent_location()) else: aa.append(a.env.agent_location()) a.env.step(action) print(action) aa.append(a.env.agent_location()) print(aa, a.env.agent_distance, a.env.agent_turns) a.env.plot_path()
def main(): # a=ppp((5,5),(((1,1), (3,3)),),(0,0)) a = ppp(5) obstacles = list(a.env.block_area) occupancy = astar.DetOccupancyGrid2D(a.env.map.height, a.env.map.width, obstacles) aa = [] while not a.end(): action = exptimax(a, 9) X, Y = a.env.next_location(action) m = a.env.entire_map() if m[X][Y] == a.env.map.VISITED: x_init = a.env.agent_location() x_goal = a.env.remaining_nodes()[0] Astar = astar.AStar((0, 0), (a.env.map.height, a.env.map.width), x_init, x_goal, occupancy) if not Astar.solve(): print("Not Solve") else: for j in range(len(Astar.path) - 1): a1, b1 = Astar.path[j] a2, b2 = Astar.path[j + 1] if a2 == a1 - 1 and b1 == b2: a.env.step(a.env.UP) elif a2 == a1 + 1 and b1 == b2: a.env.step(a.env.DOWN) elif a2 == a1 and b2 == b1 - 1: a.env.step(a.env.LEFT) elif a2 == a1 and b2 == b1 + 1: a.env.step(a.env.RIGHT) # aa.append(a.env.agent_location()) else: aa.append(a.env.agent_location()) a.env.step(action) print(action) aa.append(a.env.agent_location()) print(aa, a.env.agent_distance, a.env.agent_turns)
def local_map_approx_search(aaa): aaaa = [] def getAction(pp, dd): def recurse(s, d): if s.end(): return s.reward() elif d == 0: return s.reward() else: f = -float(' inf ') for a in s.getLegalActions(): tempt = recurse(s.generateSuccessor(a), d - 1) if tempt > f: f = tempt return f f = -float(' inf ') astore = None for a in pp.getLegalActions(): tempt = recurse(pp.generateSuccessor(a), dd - 1) if tempt > f: f = tempt astore = a return astore obstacles = list(aaa.env.block_area) occupancy = astar.DetOccupancyGrid2D(aaa.env.map.height, aaa.env.map.width, obstacles) while (aaa.end() != 1): pp = ppp() pp.env.map.data = aaa.env.local_map(aaa.lmapsize, aaa.lmapsize) a = getAction(pp, aaa.lmapsize * aaa.lmapsize - 1) X, Y = aaa.env.next_location(a) m = aaa.env.entire_map() if m[X][Y] == aaa.env.map.VISITED: x_init = aaa.env.agent_location() x_goal = aaa.env.remaining_nodes()[0] Astar = astar.AStar( (0, 0), (aaa.env.map.height, aaa.env.map.width), x_init, x_goal, occupancy) if not Astar.solve(): print("Not Solve") else: for j in range(len(Astar.path) - 1): a1, b1 = Astar.path[j] a2, b2 = Astar.path[j + 1] if a2 == a1 - 1 and b1 == b2: aaa.env.step(aaa.env.UP) elif a2 == a1 + 1 and b1 == b2: aaa.env.step(aaa.env.DOWN) elif a2 == a1 and b2 == b1 - 1: aaa.env.step(aaa.env.LEFT) elif a2 == a1 and b2 == b1 + 1: aaa.env.step(aaa.env.RIGHT) # aa.append(a.env.agent_location()) else: aaaa.append(aaa.env.agent_location()) aaa.env.step(a) #print(aaaa) # aaaa.append(aaa.env.agent_location()) return aaaa
def TDlearning(ppp, eps=0.3, iteration=200, max=10000): model = Sequential() #model.add(LocallyConnected2D(5, (3, 3), # input_shape=(1,5, 5), padding='valid',)) # model.add(Flatten(input_shape=(1,5, 5))) model.add(Dense(50, activation='relu', input_dim=104)) model.add(Dense(30, activation='relu')) model.add(Dense(30, activation='relu')) model.add(Dense(30, activation='relu')) model.add(Dense(1, activation='linear')) model.compile(loss='mse', optimizer='adam', metrics=['mae']) #model.compile(loss='mse',optimizer=keras.optimizers.SGD(lr=0.0001, momentum=0.9, nesterov=True)) for i in range(iteration): ttttt = list(ppp.env.counter.get_data(100).flatten()) ttttt.append(ppp.env.agentX) ttttt.append(ppp.env.agentY) ttttt.append(ppp.env.agent_turns) ttttt.append(ppp.env.agent_distance) ttttt = np.expand_dims(ttttt, axis=0) val = model.predict(ttttt) print("i=", i, val) temptppp = copy.deepcopy(ppp) obstacles = list(temptppp.env.block_area) occupancy = astar.DetOccupancyGrid2D(temptppp.env.map.height, temptppp.env.map.width, obstacles) j = 0 while temptppp.end() != 1 and j < max: j += 1 #print(temptppp.env.counter.data) turns = temptppp.env.agent_turns distance = temptppp.env.agent_distance unvisited = temptppp.env.num_unvisited_nodes() if random.random() < eps: a = random.choice(temptppp.getLegalActions()) newppp = temptppp.generateSuccessor(a) ttttt = list(newppp.env.counter.get_data(100).flatten()) ttttt.append(newppp.env.agentX) ttttt.append(newppp.env.agentY) ttttt.append(newppp.env.agent_turns) ttttt.append(newppp.env.agent_distance) ttttt = np.expand_dims(ttttt, axis=0) val = model.predict(ttttt) differturns = newppp.env.agent_turns - turns differdistance = 1 differunvisited = newppp.env.num_unvisited_nodes() - unvisited fff = reward = val - differturns * 2 - 2 - differunvisited * 10 else: fff = [[-float('Inf')]] for ttt in temptppp.getLegalActions(): newppp = temptppp.generateSuccessor(ttt) ttttt = list(newppp.env.counter.get_data(100).flatten()) ttttt.append(newppp.env.agentX) ttttt.append(newppp.env.agentY) ttttt.append(newppp.env.agent_turns) ttttt.append(newppp.env.agent_distance) ttttt = np.expand_dims(ttttt, axis=0) val = model.predict(ttttt) differturns = newppp.env.agent_turns - turns differdistance = 1 differunvisited = newppp.env.num_unvisited_nodes( ) - unvisited reward = val - differturns * 2 - 2 - differunvisited * 10 if reward[0][0] > fff[0][0]: a = ttt fff = reward X, Y = temptppp.env.next_location(a) m = temptppp.env.entire_map() if m[X][Y] == temptppp.env.map.VISITED: x_init = temptppp.env.agent_location() x_goal = temptppp.env.remaining_nodes()[0] Astar = astar.AStar( (0, 0), (temptppp.env.map.height, temptppp.env.map.width), x_init, x_goal, occupancy) Astar.solve() a1, b1 = Astar.path[0] a2, b2 = Astar.path[1] if a2 == a1 - 1 and b1 == b2: a = temptppp.env.UP elif a2 == a1 + 1 and b1 == b2: a = temptppp.env.DOWN elif a2 == a1 and b2 == b1 - 1: a = temptppp.env.LEFT elif a2 == a1 and b2 == b1 + 1: a = temptppp.env.RIGHT newppp = temptppp.generateSuccessor(a) differturns = newppp.env.agent_turns - turns differdistance = 1 differunvisited = newppp.env.num_unvisited_nodes() - unvisited fff = reward = val - differturns * 2 - 2 - differunvisited * 10 target = fff ttttt = list(temptppp.env.counter.get_data(100).flatten()) ttttt.append(temptppp.env.agentX) ttttt.append(temptppp.env.agentY) ttttt.append(temptppp.env.agent_turns) ttttt.append(temptppp.env.agent_distance) ttttt = np.expand_dims(ttttt, axis=0) model.fit(ttttt, target, epochs=1, verbose=0) temptppp.env.step(a) ttttt = list(temptppp.env.counter.get_data(100).flatten()) ttttt.append(temptppp.env.agentX) ttttt.append(temptppp.env.agentY) ttttt.append(temptppp.env.agent_turns) ttttt.append(temptppp.env.agent_distance) ttttt = np.expand_dims(ttttt, axis=0) # if temptppp.end()==1: #while model.predict(ttttt)>1: model.fit(ttttt, [[0.0]], epochs=5, verbose=0) ttttt = list(temptppp.env.counter.get_data(100).flatten()) ttttt.append(temptppp.env.agentX) ttttt.append(temptppp.env.agentY) ttttt.append(temptppp.env.agent_turns) ttttt.append(temptppp.env.agent_distance) ttttt = np.expand_dims(ttttt, axis=0) print('end', model.predict(ttttt)) j = 0 while ppp.end() != 1 and j < 500: j += 1 # fff = -float('Inf') turns = ppp.env.agent_turns distance = ppp.env.agent_distance unvisited = ppp.env.num_unvisited_nodes() for ttt in ppp.getLegalActions(): newppp = ppp.generateSuccessor(ttt) ttttt = list(newppp.env.counter.get_data(100).flatten()) ttttt.append(newppp.env.agentX) ttttt.append(newppp.env.agentY) ttttt.append(newppp.env.agent_turns) ttttt.append(newppp.env.agent_distance) ttttt = np.expand_dims(ttttt, axis=0) val = model.predict(ttttt) differturns = newppp.env.agent_turns - turns differdistance = 1 differunvisited = newppp.env.num_unvisited_nodes() - unvisited reward = val - differturns * 2 - 2 - differunvisited * 10 if reward > fff: a = ttt fff = val X, Y = ppp.env.next_location(a) m = ppp.env.entire_map() if m[X][Y] == ppp.env.map.VISITED: x_init = ppp.env.agent_location() x_goal = ppp.env.remaining_nodes()[0] Astar = astar.AStar( (0, 0), (ppp.env.map.height, ppp.env.map.width), x_init, x_goal, occupancy) Astar.solve() a1, b1 = Astar.path[0] a2, b2 = Astar.path[1] if a2 == a1 - 1 and b1 == b2: a = ppp.env.UP elif a2 == a1 + 1 and b1 == b2: a = ppp.env.DOWN elif a2 == a1 and b2 == b1 - 1: a = ppp.env.LEFT elif a2 == a1 and b2 == b1 + 1: a = ppp.env.RIGHT ppp.env.step(a) print(a) print(a, turns, distance, unvisited) print(ppp.env.counter.data) print(ppp.env.agent_turns, ppp.env.agent_distance)