def update(self): # We are in self.state S # Let's run our Q function on S to get Q values for all possible actions qval = DeepQLearningCarController.model.predict(self.state.reshape(1, 3), batch_size=1) if (random.random() < self.epsilon): # choose random action action = np.random.randint(0, 3) else: # choose best action from Q(s,a) values action = (np.argmax(qval)) # Take action, observe new self.state S' new_state = self.makeMove(self.state, action) # Observe reward reward = self.getReward(new_state) # Get max_Q(S',a) newQ = DeepQLearningCarController.model.predict(new_state.reshape(1, 3), batch_size=1) maxQ = np.max(newQ) y = np.zeros((1, 3)) y[:] = qval[:] if not self.car.isalive: # non-terminal self.state update = (reward + (self.gamma * maxQ)) else: # terminal self.state update = reward y[0][action] = update # target output print("Game #: %s" % (self.currentEpisode,)) DeepQLearningCarController.model.fit(self.state.reshape(1, 3), y, batch_size=1, nb_epoch=1, verbose=1) self.state = new_state if not self.car.isalive: self.currentEpisode+=1 self.car.removeFromCanvas() self.car = Car(self.track, DeepQLearningBrain(), self.level) if self.epsilon > 0.1: self.epsilon -= (1 / self.epochs) self.canvas.after(1, self.update)
class TestCar(unittest.TestCase): def setUp(self) -> None: self.c1 = Car('blue', 20) self.c2 = Car('green', 30) def test_faster_then(self): self.assertTrue(self.c2.is_faster_than(self.c1)) self.assertFalse(self.c1.is_faster_than(self.c2)) def test_colors(self): self.assertEqual(self.c1.color, 'blue') def test_speed(self): self.assertEqual(self.c1.speed, 20)
def simulateGens(self): for x in range(0, self.numberOfGeneration - 1): for x in range(0, self.sizeOfOneGeneration): a = uniform(0, 100) if (a < 25): self.cars.append(Car(self.track, ParameterEvolutionBrain(ParameterEvolutionAgent.randomAgent()), self.level, color="red")) elif a < 50: self.cars.append(Car(self.track, ParameterEvolutionBrain(ParameterEvolutionAgent.randomAgent()), self.level, color="red")) elif a < 75: self.cars.append(Car(self.track, ParameterEvolutionBrain(ParameterEvolutionAgent.randomAgent()), self.level, color="red")) else: self.cars.append(Car(self.track, ParameterEvolutionBrain(ParameterEvolutionAgent.randomAgent()), self.level, color="red")) self.update() tkinter.mainloop()
def populate(): if population: for c in [c for c in population]: canvas.delete(c.canvas_shape_id) del population[:] for i in range(20): population.append(Car(track, brain_level1.tinyBrainTime(), color="blue"))
def breed_population(parents, n): population = [] for p in parents: brain = p.brain.clone() population.append(Car(track, brain, color="blue")) while len(population) < n: parent = random.choice(parents) child_brain = GeneticBrain() child_brain.script = parent.brain.script child_brain._parent_script_i = max(0, int(parent.brain._script_i - 5)) child_brain.mutate() population += [Car(track, child_brain, color="blue")] return population
def __init__(self): self.canvas = tkinter.Canvas(width=800, height=600, background="yellow green") self.canvas.pack() self.level = 6 # load track self.track = Track.level(self.canvas, draw_midline=True, level_number=self.level) self.track.draw() self.tickSpeed = 1 self.isFree = True self.state = np.array([3.0, 4.0, 5.0]) self.car = Car(self.track, DeepQLearningBrain(), self.level) self.epochs = 1000 self.gamma = 0.9 # since it may take several moves to goal, making gamma high self.epsilon = 1 self.currentEpisode = 1
def updateCars(self): self.cars.sort() bestCars=[] print('Generation ' + str(self.genCounter) + 'hatte den besten Reward ' + str(self.cars[0].totalReward)) print('\n') for x in range(0, 5): if(x + 1 >= len(self.cars)): break self.cars[x].color = 'green' bestCars.append(self.cars[x]) self.cars = [] for x in range(0, self.sizeOfOneGeneration-6): newAgent = bestCars[randint(0,len(bestCars)-1)].brain.agent.uniform(bestCars[randint(0, len(bestCars) - 1)].brain.agent) newAgent.mutate() self.cars.append(Car(self.track, ParameterEvolutionBrain(newAgent), self.level)) self.isFree = True
def read_map(path): polylines = [] with open(path) as json_file: json_data = json.load(json_file) for line in json_data['polylines'].split('\n'): if line == '': break pl = Polyline([]) coords_str = line.split(sep=':') coords_n = [] for c in coords_str: xy = c.split(sep=',') coords_n.append([int(xy[0]), int(xy[1])]) for i in range(len(coords_n) - 1): pl.add_line( Line(Vector(coords_n[i][0], coords_n[i][1]), Vector(coords_n[i + 1][0], coords_n[i + 1][1]))) polylines.append(pl) return Map(Car(Vector(json_data['start'][0], json_data['start'][1])), polylines)
def setUp(self) -> None: self.c1 = Car('blue', 20) self.c2 = Car('green', 30)
# import tkinter from lib.tracks import Track from lib.car import Car import brains # create canvas for drawing canvas = tkinter.Canvas(width=800, height=600, background="yellow green") canvas.pack() # load track track = Track.level(canvas, draw_midline=True, level_number=1) track.draw() # create car car = Car(track, brains.InteractiveBrain(), color="blue") def update(): '''Update the car and redraw it.''' car.update() car.draw() # increase value to slow down total speed of simulation canvas.after(60, update) # start update & mainloop of window update() tkinter.mainloop()
class DeepQLearningCarController: model = Sequential() def __init__(self): self.canvas = tkinter.Canvas(width=800, height=600, background="yellow green") self.canvas.pack() self.level = 6 # load track self.track = Track.level(self.canvas, draw_midline=True, level_number=self.level) self.track.draw() self.tickSpeed = 1 self.isFree = True self.state = np.array([3.0, 4.0, 5.0]) self.car = Car(self.track, DeepQLearningBrain(), self.level) self.epochs = 1000 self.gamma = 0.9 # since it may take several moves to goal, making gamma high self.epsilon = 1 self.currentEpisode = 1 def initNeuralNetwork(self): DeepQLearningCarController.model.add(Dense(164, init='lecun_uniform', input_shape=(3,))) DeepQLearningCarController.model.add(Activation('relu')) # DeepQLearningCarController.model.add(Dropout(0.2)) I'm not using dropout, but maybe you wanna give it a try? DeepQLearningCarController.model.add(Dense(150, init='lecun_uniform')) DeepQLearningCarController.model.add(Activation('relu')) # DeepQLearningCarController.model.add(Dropout(0.2)) DeepQLearningCarController.model.add(Dense(3, init='lecun_uniform')) DeepQLearningCarController.model.add(Activation('linear')) # linear output so we can have range of real-valued outputs rms = RMSprop() DeepQLearningCarController.model.compile(loss='mse', optimizer=rms) def update(self): # We are in self.state S # Let's run our Q function on S to get Q values for all possible actions qval = DeepQLearningCarController.model.predict(self.state.reshape(1, 3), batch_size=1) if (random.random() < self.epsilon): # choose random action action = np.random.randint(0, 3) else: # choose best action from Q(s,a) values action = (np.argmax(qval)) # Take action, observe new self.state S' new_state = self.makeMove(self.state, action) # Observe reward reward = self.getReward(new_state) # Get max_Q(S',a) newQ = DeepQLearningCarController.model.predict(new_state.reshape(1, 3), batch_size=1) maxQ = np.max(newQ) y = np.zeros((1, 3)) y[:] = qval[:] if not self.car.isalive: # non-terminal self.state update = (reward + (self.gamma * maxQ)) else: # terminal self.state update = reward y[0][action] = update # target output print("Game #: %s" % (self.currentEpisode,)) DeepQLearningCarController.model.fit(self.state.reshape(1, 3), y, batch_size=1, nb_epoch=1, verbose=1) self.state = new_state if not self.car.isalive: self.currentEpisode+=1 self.car.removeFromCanvas() self.car = Car(self.track, DeepQLearningBrain(), self.level) if self.epsilon > 0.1: self.epsilon -= (1 / self.epochs) self.canvas.after(1, self.update) def getReward(self, car): return self.car.reward def makeMove(self, state, action): self.car.brain.action = action self.car.update() self.car.draw() frontCenterNormalized = 0 frontLeftNormalized = 0 frontRightNormalized = 0 try: frontCenterNormalized = 1.0/self.car.brain.front_center_sensor.distance except: frontCenterNormalized = 1 try: frontLeftNormalized = 1.0 / self.car.brain.front_left_sensor.distance except: frontLeftNormalized = 1 try: frontRightNormalized = 1.0/self.car.brain.front_right_sensor.distance except: frontRightNormalized = 1 return np.array([frontCenterNormalized, frontLeftNormalized, frontRightNormalized]) def run(self): self.initNeuralNetwork() self.update() tkinter.mainloop()
mapHelperVar = MapHelper(dataCross, dataRoad) # mapHelperVar.plotMap(showRoadId=True) trafficMap = Map(configPath) roadInstances = generateRoadInstances(configPath) mapHelperVar.initialDirGraph(trafficMap.crossRelation, roadInstances) print(mapHelperVar.findShortestPathByNetworkx('2', '31')) print( mapHelperVar.findShortestPathByMyDijkstra('2', '31', trafficMap.crossRelation, roadInstances)) carDict = {} carVar = Cars(dataCar) file = open(configPath + '/answer.txt', 'w') path = {} for carId in carVar.getCarIdList(): carDict[carId] = Car(carId, carVar) fromCrossId = str(carDict[carId].getCarFrom()) toCrossId = str(carDict[carId].getCarTo()) if not (fromCrossId in path and toCrossId in path[fromCrossId]): pathTemp = mapHelperVar \ .findShortestPathByMyDijkstra(fromCrossId, toCrossId, trafficMap.crossRelation, roadInstances) path[fromCrossId] = {toCrossId: pathTemp} print(carId) carDict[carId].addDrivePath(path[fromCrossId][toCrossId]) string = str((carId, carDict[carId].getCarPlanTime(), carDict[carId].getDrivePath())) string = string.replace('[', '') string = string.replace(']', '') file.write(string + '\n') file.close() # roadsVar = Roads(dataRoad)
def create_initial_population(n): population = [] for i in range(0, n): population += [Car(track, GeneticBrain.random(), color="blue")] return population