def update_table(state, action, next_state, car): key = (state, action) reward = -9999 if Tile.collides(car.pose[0], car.pose[1]) else 1 print("Update Q Table: ", key, Q[key], get_best_action(next_state)) Q[key] = Q[key] + learning_rate * ( reward + discount_rate * get_best_action(next_state)[1] - Q[key]) print("New Q Value:", key, Q[key])
def run_trial(cars, models, allsprites, scores, display=True): np.random.seed() screen = pygame.display.set_mode((width, height)) pygame.display.set_caption('ArduBot: Development Stage') pygame.mouse.set_visible(0) background = pygame.Surface(screen.get_size()) background = background.convert() background.fill((250, 250, 250)) bg = pygame.image.load("../../images/map1.png") screen.blit(bg, (0, 0)) pygame.display.flip() Tile.pre_init(screen) Tile.load(obstacles()) clock = pygame.time.Clock() while 1: clock.tick(60) blocked = True for i in range(len(cars)): car = cars[i] if Tile.collides(car.pose[0], car.pose[1]) or scores[i] > 1500: continue blocked = False move = models[i].get_action(car.get_relative_state()) car.move(move) scores[i] += 1 if blocked: return scores for event in pygame.event.get(): if event.type == QUIT: return elif event.type == KEYDOWN and event.key == K_ESCAPE: return elif event.type == KEYDOWN: car.handle_key(event.key) allsprites.update() # Draw Everything if display: screen.blit(bg, (0, 0)) allsprites.draw(screen) pygame.display.flip() return scores
def update(self): move = (-1 * self.forward_velocity * math.sin(self.orientation), self.forward_velocity * math.cos(self.orientation)) # print(to_degrees(self.orientation), move[0], move[1], round(move[0]), round(move[1])) new_pose = self.rect.move(round(move[0]), round(-1 * move[1])) if (new_pose[0] <= self.area.left or new_pose[0] >= self.area.right - 50) or \ (new_pose[1] <= self.area.top or new_pose[1] >= self.area.bottom - 50): return if Tile.collides(self.pose[0], self.pose[1]): return -1 self.pose = new_pose.center self.rect = new_pose
def run_trial(cars, models, allsprites, scores, display=True): np.random.seed() screen = pygame.display.set_mode((width, height)) pygame.display.set_caption('ArduBot: Development Stage') pygame.mouse.set_visible(0) background = pygame.Surface(screen.get_size()) background = background.convert() background.fill((250, 250, 250)) bg = pygame.image.load("../../images/map1.png") screen.blit(bg, (0, 0)) pygame.display.flip() Tile.pre_init(screen) Tile.load(obstacles()) clock = pygame.time.Clock() random_moves = 0 while 1: clock.tick(60) blocked = True states = [] actions = [] for i in range(len(cars)): car = cars[i] state = car.get_list() action = get_best_action(state)[0] val = np.random.rand() if val < epsilon(): random_moves += 1 action = possible_actions[np.random.randint( 0, high=len(possible_actions))] else: print("Choosing: ", car.pose, get_best_action(state), (-1.5, Q[(state, -1.5)]), (0, Q[(state, 0)]), (1.5, Q[(state, 1.5)])) global count count += 0.0001 / max_players states.append(state) actions.append(action) if Tile.collides(car.pose[0], car.pose[1]) or scores[i] > 1500: continue blocked = False car.move(action) scores[i] += 1 if blocked: return scores, len(Q), random_moves for event in pygame.event.get(): if event.type == QUIT: return elif event.type == KEYDOWN and event.key == K_ESCAPE: for k in Q: print(k, Q[k]) return elif event.type == KEYDOWN: car.handle_key(event.key) allsprites.update() # Update q values for i in range(len(cars)): update_table(states[i], actions[i], cars[i].get_list(), cars[i]) # Draw Everything if display: screen.blit(bg, (0, 0)) allsprites.draw(screen) pygame.display.flip() return scores, len(Q), random_moves