sound_manager.play_background_music(LAVA_MUSIC_PATH) if (sound_manager.playing_background_music and sound_manager.music_playing == LAVA_MUSIC_PATH and lava.state == ENDED): configuration.save_score(player.score) sound_manager.stop_background_music() if (player_score != player.score): sound_manager.play_effect(MONEY_SOUND_PATH) player_score = player.score #Drawing surface.fill((0, 0, 255)) level.draw(surface, camera) player.draw(surface, camera) lava.draw(surface, camera) label = font.render("Score: %d R" % (player.score), 1, (255, 255, 255)) surface.blit(label, (0, 0)) if (lava.state == EMERGING): label = font.render("LAVA EMERGING: RUN TO THE SURFACE!", 1, (255, 0, 0)) surface.blit(label, (0, 30)) elif conf.state == MENU_STATE: if (sound_manager.playing_background_music and sound_manager.music_playing != MENU_MUSIC_PATH): sound_manager.stop_background_music() elif (not sound_manager.playing_background_music): sound_manager.play_background_music(MENU_MUSIC_PATH) menu.draw_menu(surface, font, conf)
class Environment: def __init__(self, wrap=False, grid_size=10, local_size=9, rate=100, max_time=math.inf, action_space=5, food_count=1, stick_count=1, obstacle_count=0, lava_count=0, zombie_count=0, history=30, map_path=None): """ Initialise the Game Environment with default values """ #self.FPS = 120 # NOT USED YET self.UPDATE_RATE = rate self.SCALE = 20 # Scale of each block 20x20 pixels self.GRID_SIZE = grid_size self.LOCAL_GRID_SIZE = local_size # Has to be an odd number self.ENABLE_WRAP = wrap if not self.ENABLE_WRAP: self.GRID_SIZE += 2 self.ACTION_SPACE = action_space self.MAP_PATH = map_path self.DISPLAY_WIDTH = self.GRID_SIZE * self.SCALE self.DISPLAY_HEIGHT = self.GRID_SIZE * self.SCALE # Maximum timesteps before an episode is stopped self.MAX_TIME_PER_EPISODE = max_time # Create and Initialise steve self.steve = Steve(history) # Create Food self.NUM_OF_FOOD = food_count self.food = Food(self.NUM_OF_FOOD) self.NUM_OF_STICKS = stick_count self.stick = Stick(self.NUM_OF_STICKS) # Create Obstacles self.NUM_OF_OBSTACLES = obstacle_count self.obstacle = Obstacle(self.NUM_OF_OBSTACLES) # Create Lava self.NUM_OF_LAVA = lava_count self.lava = Lava(self.NUM_OF_LAVA) # Create Zombies self.NUM_OF_ZOMBIES = zombie_count self.zombie = Zombie(self.NUM_OF_ZOMBIES) self.score = 0 self.time = 0 self.state = None self.display = None self.bg = None self.clock = None self.font = None self.steps = 0 self.spawn_new_food = False self.last_eaten_food = -1 # Used putting food and obstacles on the grid self.grid = [] for j in range(self.GRID_SIZE): for i in range(self.GRID_SIZE): self.grid.append((i * self.SCALE, j * self.SCALE)) self.maze = [] # created in reset() def prerender(self): """ If you want to render the game to the screen, you will have to prerender Load textures / images """ pygame.init() pygame.key.set_repeat(1, 1) self.display = pygame.display.set_mode( (self.DISPLAY_WIDTH, self.DISPLAY_HEIGHT)) pygame.display.set_caption('Malmo Simulation') self.clock = pygame.time.Clock() pygame.font.init() self.font = pygame.font.SysFont('Default', 32, bold=False) # Creates a visual steve self.steve.create(pygame, self.SCALE) # Creates visual Food self.food.create(pygame, self.SCALE) # Creates visual Food self.stick.create(pygame, self.SCALE) # Creates visual Obstacles self.obstacle.create(pygame, self.SCALE) # Creates visual Lava self.lava.create(pygame, self.SCALE) # Creates visual Multipliers self.zombie.create(pygame, self.SCALE) # Creates the grid background self.bg = pygame.image.load("./Images/Grid100.png").convert() self.bg = pygame.transform.scale(self.bg, (self.SCALE * 100, self.SCALE * 100)) def reset(self): """Reset the environment""" self.steps = 0 # Reset the score to 0 self.score = 0 self.steve.health = 3 # Positions on the grid that are not allowed to spawn things disallowed = [] # Create obstacles and lava in the environment self.obstacle.array.clear() self.obstacle.array_length = 0 self.lava.array.clear() self.lava.array_length = 0 if self.MAP_PATH != None: self.obstacle.reset_map(self.GRID_SIZE, self.MAP_PATH, self.ENABLE_WRAP) self.lava.reset_map(self.GRID_SIZE, self.MAP_PATH, self.ENABLE_WRAP) if not self.ENABLE_WRAP: self.obstacle.create_border(self.GRID_SIZE, self.SCALE) # Add all obstacle positions to the disallowed list [disallowed.append(grid_pos) for grid_pos in self.obstacle.array] [disallowed.append(grid_pos) for grid_pos in self.lava.array] # Add random obstacles not already on the map self.obstacle.reset(self.grid, disallowed) # Add all obstacle positions to the disallowed list disallowed = [] [disallowed.append(grid_pos) for grid_pos in self.obstacle.array] [disallowed.append(grid_pos) for grid_pos in self.lava.array] # Create lava in the environment # self.lava.array.clear() # self.lava.array_length = 0 # if self.MAP_PATH != None: # self.lava.reset_map(self.GRID_SIZE, self.MAP_PATH, self.ENABLE_WRAP) # [disallowed.append(grid_pos) for grid_pos in self.lava.array] # Add random lava not already on the map self.lava.reset(self.grid, disallowed) disallowed = [] [disallowed.append(grid_pos) for grid_pos in self.obstacle.array] [disallowed.append(grid_pos) for grid_pos in self.lava.array] self.maze = createGrid(self.GRID_SIZE, disallowed, self.SCALE) # print(self.obstacle.array) # print("\n") # print(self.maze) # Reseting Steve at a random (or specific) position self.steve.reset(self.grid, disallowed) # Initialise the movement to not moving if self.ACTION_SPACE == 5: self.steve.dx = 0 self.steve.dy = 0 disallowed.append(self.steve.pos) # Create a piece of food self.food.reset(self.NUM_OF_FOOD, self.grid, disallowed) # self.food.make_within_range(self.GRID_SIZE, self.SCALE, self.steve) self.spawn_new_food = False [disallowed.append(grid_pos) for grid_pos in self.food.array] # Spawn in a zombie self.zombie.reset(self.grid, disallowed) # Fill the state array with the appropriate state representation # self.state = self.state_array() # self.state = self.state_vector_3D() self.state = self.local_state_vector_3D() # Reset the time self.time = 0 # A dictionary of information that can be useful info = {"time": self.time, "score": self.score} return self.state, info def quick_reset(self): """Reset the environment and places all entities according to the map layout""" self.steps = 0 # Reset the score to 0 self.score = 0 self.steve.health = 5 # Positions on the grid that are not allowed to spawn things disallowed = [] # Create obstacles and lava in the environment self.obstacle.array.clear() self.obstacle.array_length = 0 self.lava.array.clear() self.lava.array_length = 0 self.zombie.array.clear() self.zombie.amount = 0 self.steve.pos, self.food.array, self.stick.array, self.zombie.array, barrier = reset_impossible_map( self.GRID_SIZE, self.MAP_PATH) self.zombie.prev_array = self.zombie.array # self.steve.pos = (0,0) self.steve.x = self.steve.pos[0][0] self.steve.y = self.steve.pos[0][1] # quick fix for something silly self.steve.pos[0] = self.steve.x self.steve.pos.append(self.steve.y) self.steve.history.clear() self.steve.history.append((self.steve.x, self.steve.y)) self.steve.hasSword = False self.obstacle.reset_map(self.GRID_SIZE, self.MAP_PATH, self.ENABLE_WRAP) self.lava.reset_map(self.GRID_SIZE, self.MAP_PATH, self.ENABLE_WRAP) self.obstacle.create_border(self.GRID_SIZE, self.SCALE) [disallowed.append(grid_pos) for grid_pos in self.obstacle.array] [disallowed.append(grid_pos) for grid_pos in self.lava.array] [disallowed.append(grid_pos) for grid_pos in barrier] self.maze = createGrid(self.GRID_SIZE, disallowed, self.SCALE) # Initialise the movement to not moving if self.ACTION_SPACE == 5: self.steve.dx = 0 self.steve.dy = 0 self.spawn_new_food = False self.zombie.amount = len(self.zombie.array) self.food.amount = len(self.food.array) self.stick.amount = len(self.stick.array) # Fill the state array with the appropriate state representation # self.state = self.state_array() # self.state = self.state_vector_3D() self.state = self.local_state_vector_3D() # Reset the time self.time = 0 # A dictionary of information that can be useful info = {"time": self.time, "score": self.score} return self.state, info def render(self): """Renders ONLY the CURRENT state""" # Mainly to close the window and stop program when it's running action = self.controls() # Set the window caption to the score # pygame.display.set_caption("Score: " + str(self.score)) # Draw the background, steve and the food self.display.blit(self.bg, (0, 0)) self.obstacle.draw(self.display) self.lava.draw(self.display) self.food.draw(self.display) self.stick.draw(self.display) self.zombie.draw(self.display) self.steve.draw(self.display) self.steve.draw_health(self.display) # Text on screen text = self.font.render("Score: " + str(int(self.score)), True, (240, 240, 240, 0)) self.display.blit(text, (10, 0)) # text = self.font.render("Multiplier: "+str(self.steve.score_multiplier)+"x", True, (240, 240, 240, 0)) # self.display.blit(text,(150,0)) # Update the pygame display pygame.display.update() # print(pygame.display.get_surface().get_size()) # pygame.display.get_surface().lock() # p = pygame.PixelArray(pygame.display.get_surface()) # p = pygame.surfarray.array3d(pygame.display.get_surface()) # print("PIXELS:", p.shape) # pygame.display.get_surface().unlock() # print(clock.get_rawtime()) # clock.tick(self.FPS) # FPS setting # Adds a delay to the the game when rendering so that humans can watch pygame.time.delay(self.UPDATE_RATE) return action def end(self): """ Ending the Game - This has to be at the end of the code Clean way to end the pygame env with a few backups... """ pygame.quit() quit() sys.exit(0) # safe backup def wrap(self): """ If the steve goes out the screen bounds, wrap it around""" if self.steve.x > self.DISPLAY_WIDTH - self.SCALE: self.steve.x = 0 if self.steve.x < 0: self.steve.x = self.DISPLAY_WIDTH - self.SCALE if self.steve.y > self.DISPLAY_HEIGHT - self.SCALE: self.steve.y = 0 if self.steve.y < 0: self.steve.y = self.DISPLAY_HEIGHT - self.SCALE self.steve.pos = (self.steve.x, self.steve.y) def step(self, action): """ Step through the game, one state at a time. Return the reward, the new_state, whether its reached_diamond or not, and the time """ self.steps += 1 # Rewards: # reward_each_time_step = 1.0 reward_each_time_step = -0.1 reward_collecting_diamond = 10.0 reward_out_of_bounds = -1.0 # not used reward_zombie_hit = -10.0 reward_in_lava = -10.0 reward_barrier = -2.0 # Increment time step self.time += 1 # If the steve has reached the food reached_diamond = False hit_obstacle = False in_lava = False # If the episode is finished - after a certain amount of timesteps or it crashed done = False # Initialze to -1 for every time step - to find the fastest route (can be a more negative reward) reward = reward_each_time_step # Negetive exponential distance rewards # if len(self.zombie.array) > 0: # distance_list = [] # for i, z in enumerate(self.zombie.array): # distance_list.append((math.sqrt((self.steve.x - z[0])**2 + (self.steve.y - z[1])**2)/self.GRID_SIZE)/2) # # print(distance_list) # if min(distance_list) < 1.6: # reward = reward_barrier # normal_distance = ((distance/self.GRID_SIZE)*(pi/2))-pi/4 # normal_distance = ((distance/self.GRID_SIZE)*(1.0))-0.4 # reward = np.tan(normal_distance) # reward = -np.exp(-distance)*10 # Linear distance reward # if len(self.zombie.array) > 0: # reward = (math.sqrt((self.steve.x - self.zombie.array[0][0])**2 + (self.steve.y - self.zombie.array[0][1])**2)/20)/self.GRID_SIZE # Exponential distance reward # if len(self.zombie.array) > 0: # reward = (((self.steve.x - self.zombie.array[0][0])**2 + (self.steve.y - self.zombie.array[0][1])**2)/20**2)/self.GRID_SIZE # Test: if moving, give a reward # if self.steve.dx != 0 or self.steve.dy != 0: # reward = -0.01 # Test for having a barrier around the zombie # if reward < 0.143: # reward = -5 # Update the position of steve self.steve.update(self.SCALE, action, self.ACTION_SPACE) if self.ENABLE_WRAP: self.wrap() else: if self.steve.x > self.DISPLAY_WIDTH - self.SCALE: reward = reward_out_of_bounds done = True if self.steve.x < 0: reward = reward_out_of_bounds done = True if self.steve.y > self.DISPLAY_HEIGHT - self.SCALE: reward = reward_out_of_bounds done = True if self.steve.y < 0: reward = reward_out_of_bounds done = True # Check for obstacle collision for i in range(self.obstacle.array_length): hit_obstacle = (self.steve.pos == self.obstacle.array[i]) if hit_obstacle: self.steve.x = self.steve.prev_pos[0] self.steve.y = self.steve.prev_pos[1] self.steve.pos = self.steve.prev_pos self.steve.history.pop(len(self.steve.history) - 1) self.steve.history.append(self.steve.pos) # done = True # reward = -0.1 # Check for lava collision for i in range(self.lava.array_length): in_lava = (self.steve.pos == self.lava.array[i]) if in_lava: done = True reward = reward_in_lava # Update the position of the zombie # self.zombie.move(self.maze, self.steve, self.steps, self.SCALE) self.zombie.range_move(self.maze, self.steve, self.steps, self.SCALE) # Check if zombie gets steve for i in range(self.zombie.amount): # print(self.steve.pos, self.zombie.array[i]) if self.steve.pos == self.zombie.array[i]: zombie_hit = True else: zombie_hit = False self.steve.isHit = False if zombie_hit: self.zombie.move_back(i) self.steve.isHit = True self.steve.health = self.steve.health - 1 # done = True reward = reward_zombie_hit break # Make the most recent history have the most negative rewards if self.steve.history_size != 0: decay = (reward_each_time_step) / (self.steve.history_size) for i in range(len(self.steve.history) - 1): # print(i,-1*(1-decay*i)) if ((self.steve.pos) == self.steve.history[-i - 2]): # reward = -1*(1-decay*i) break # Checking if Steve has reached the diamond reached_diamond, diamond_index = self.food.eat(self.steve) # ADDED FOR ALLOWING THE MODEL TO HAVE STEVE OVER THE FOOD IN THE STATE # if self.spawn_new_food: # disallowed = [] # [disallowed.append(grid_pos) for grid_pos in self.obstacle.array] # # [disallowed.append(grid_pos) for grid_pos in self.zombie.array] # # if self.steve.pos not in disallowed: # # disallowed.append(self.steve.pos) # self.food.make(self.grid, disallowed, index = self.last_eaten_food) # self.spawn_new_food = False # reached_diamond = False # If Steve reaches the food, increment score if reached_diamond: self.score += 1 self.last_eaten_food = diamond_index self.spawn_new_food = False # Create a piece of food that is not within Steve disallowed = [] [disallowed.append(grid_pos) for grid_pos in self.obstacle.array] # [disallowed.append(grid_pos) for grid_pos in self.zombie.array] self.food.make(self.grid, disallowed, index=diamond_index) # Only collect 1 food item at a time # done = True # Reward functions reward = reward_collecting_diamond else: self.spawn_new_food = False # If Steve collects a stick, increment score collected_stick, stick_index = self.stick.collect(self.steve) if collected_stick: self.score += 1 # Create a piece of food that is not within Steve disallowed = [] [disallowed.append(grid_pos) for grid_pos in self.obstacle.array] self.stick.make(self.grid, disallowed, index=stick_index) # Only collect 1 food item at a time # done = True # Reward functions reward = reward_collecting_diamond # For fun if self.score == 3: self.steve.hasSword = True done = True # To make it compatible with malmo if self.score == self.NUM_OF_FOOD: # reward = 1 pass if self.NUM_OF_FOOD != 0: done = True pass if self.steve.health <= 0: done = True # If the episode takes longer than the max time, it ends if self.time == self.MAX_TIME_PER_EPISODE: # print("Steve survived :)") # reward = -1.0 done = True # Get the new_state # new_state = self.state_array() # new_state = self.state_vector_3D() new_state = self.local_state_vector_3D() # print(reward) # print(self.steve.history) # A dictionary of information that may be useful info = {"time": self.time, "score": self.score} return new_state, reward, done, info def state_index(self, state_array): """ Given the state array, return the index of that state as an integer Used for the Qlearning lookup table """ return int((self.GRID_SIZE**3) * state_array[0] + (self.GRID_SIZE**2) * state_array[1] + (self.GRID_SIZE**1) * state_array[2] + (self.GRID_SIZE**0) * state_array[3]) def sample_action(self): """ Return a random action Can't use action space 4 with a tail, else it will have a double chance of doing nothing or crash into itself """ return np.random.randint(0, self.ACTION_SPACE) def set_state(self, state): """Set the state of the game environment""" self.state = state def set_map(self, map_path): self.MAP_PATH = map_path def number_of_states(self): """ Return the number of states with just the steve head and 1 food Used for Q Learning look up table """ return (self.GRID_SIZE**2) * ((self.GRID_SIZE**2)) def number_of_actions(self): """ Return the number of possible actions Action space: > nothing, up, down, left, right """ return self.ACTION_SPACE def state_array(self): """ The state represented as an array or steve positions and food positions Used for Q learning """ new_state = np.zeros(4) new_state[0] = int(self.steve.x / self.SCALE) new_state[1] = int(self.steve.y / self.SCALE) new_state[2] = int(self.food.x / self.SCALE) new_state[3] = int(self.food.y / self.SCALE) return new_state def state_vector(self): """ The state represented as a onehot 1D vector Used for the feed forward NN """ # (rows, columns) state = np.zeros((self.GRID_SIZE**2, 3)) # This is for STEVE, the FOOD and EMPTY for i in range(self.GRID_SIZE): # rows for j in range(self.GRID_SIZE): # columns if ((self.steve.x / self.SCALE) == j and (self.steve.y / self.SCALE) == i): state[i * self.GRID_SIZE + j] = [1, 0, 0] # print("steve:", i*self.GRID_SIZE+j) elif ((self.food.x / self.SCALE) == j and (self.food.y / self.SCALE) == i): state[i * self.GRID_SIZE + j] = [0, 1, 0] # print("Food:", i*self.GRID_SIZE+j) else: state[i * self.GRID_SIZE + j] = [0, 0, 1] # Flatten the vector to a 1 dimensional vector for the input layer to the NN state = state.flatten() state = state.reshape(1, (self.GRID_SIZE**2) * 3) # state = np.transpose(state) return state def state_vector_3D(self): """ State as a 3D vector of the whole map for the CNN Shape = (Layers, GRID_SIZE, GRID_SIZE) """ state = np.zeros((3, self.GRID_SIZE, self.GRID_SIZE)) state[0, int(self.steve.y / self.SCALE), int(self.steve.x / self.SCALE)] = 1 state[1, int(self.food.y / self.SCALE), int(self.food.x / self.SCALE)] = 1 # Obstacles for i in range(self.obstacle.array_length): state[2, int(self.obstacle.array[i][1] / self.SCALE), int(self.obstacle.array[i][0] / self.SCALE)] = 1 # Zombies # for i in range(len(self.zombie.array)): # state[1, int(self.zombie.array[i][1]/self.SCALE), int(self.zombie.array[i][0]/self.SCALE)] = 1 return state def local_state_vector_3D(self): """ State as a 3D vector of the local area around the steve Shape = (Layers, LOCAL_GRID_SIZE, LOCAL_GRID_SIZE) """ s_pos = 0 d_pos = 1 z_pos = 2 h_pos = 3 l_pos = 4 o_ops = 5 #s = steve sx = int(self.steve.x / self.SCALE) sy = int(self.steve.y / self.SCALE) state = np.zeros((6, self.LOCAL_GRID_SIZE, self.LOCAL_GRID_SIZE)) # Agent local_pos = int((self.LOCAL_GRID_SIZE - 1) / 2) state[s_pos, local_pos, local_pos] = 1 # Diamonds for i in range(self.food.amount): x_prime_food = local_pos + int( self.food.array[i][0] / self.SCALE) - sx y_prime_food = local_pos + int( self.food.array[i][1] / self.SCALE) - sy if x_prime_food < self.LOCAL_GRID_SIZE and x_prime_food >= 0 and y_prime_food < self.LOCAL_GRID_SIZE and y_prime_food >= 0: state[d_pos, y_prime_food, x_prime_food] = 1 pass # Zombies for i in range(len(self.zombie.array)): x_prime_zom = local_pos + int( self.zombie.array[i][0] / self.SCALE) - sx y_prime_zom = local_pos + int( self.zombie.array[i][1] / self.SCALE) - sy if x_prime_zom < self.LOCAL_GRID_SIZE and x_prime_zom >= 0 and y_prime_zom < self.LOCAL_GRID_SIZE and y_prime_zom >= 0: state[z_pos, y_prime_zom, x_prime_zom] = 1 pass # Obstacles for i in range(self.obstacle.array_length): x_prime_obs = local_pos + int( self.obstacle.array[i][0] / self.SCALE) - sx y_prime_obs = local_pos + int( self.obstacle.array[i][1] / self.SCALE) - sy if x_prime_obs < self.LOCAL_GRID_SIZE and x_prime_obs >= 0 and y_prime_obs < self.LOCAL_GRID_SIZE and y_prime_obs >= 0: state[o_ops, y_prime_obs, x_prime_obs] = 1 pass # Out of bounds for j in range(0, self.LOCAL_GRID_SIZE): for i in range(0, self.LOCAL_GRID_SIZE): x_prime_wall = local_pos - sx y_prime_wall = local_pos - sy if i < x_prime_wall or j < y_prime_wall: state[o_ops, j, i] = 1 pass x_prime_wall = local_pos + (self.GRID_SIZE - sx) - 1 y_prime_wall = local_pos + (self.GRID_SIZE - sy) - 1 if i > x_prime_wall or j > y_prime_wall: state[o_ops, j, i] = 1 pass # Lava for i in range(self.lava.array_length): x_prime_lava = local_pos + int( self.lava.array[i][0] / self.SCALE) - sx y_prime_lava = local_pos + int( self.lava.array[i][1] / self.SCALE) - sy if x_prime_lava < self.LOCAL_GRID_SIZE and x_prime_lava >= 0 and y_prime_lava < self.LOCAL_GRID_SIZE and y_prime_lava >= 0: state[l_pos, y_prime_lava, x_prime_lava] = 1 pass # History if self.steve.history_size != 0: decay = 1 / self.steve.history_size for i in range(len(self.steve.history) - 1): x_prime = local_pos + int( self.steve.history[-i - 2][0] / self.SCALE) - sx y_prime = local_pos + int( self.steve.history[-i - 2][1] / self.SCALE) - sy if x_prime < self.LOCAL_GRID_SIZE and x_prime >= 0 and y_prime < self.LOCAL_GRID_SIZE and y_prime >= 0: if 1 - decay * i >= 0 and state[h_pos, y_prime, x_prime] == 0: state[h_pos, y_prime, x_prime] = 1 - decay * i pass # Delete these layers: # state = np.delete(state, s_pos, 0) # state = np.delete(state, d_pos, 0) # state = np.delete(state, z_pos, 0) # state = np.delete(state, h_pos, 0) # state = np.delete(state, l_pos, 0) # state = np.delete(state, o_pos, 0) # DIAMOND DOJO # state = np.delete(state, 2, 0) # state = np.delete(state, 2, 0) # ZOMBIE DOJO # state = np.delete(state, 1, 0) # state = np.delete(state, 2, 0) # EXPLORE DOJO # state = np.delete(state, 1, 0) # state = np.delete(state, 1, 0) # GRID 9 SETUP # state = np.delete(state, 1, 0) # state = np.delete(state, 2, 0) # state = np.delete(state, 2, 0) # state = np.delete(state, 3, 0) # state = np.delete(state, 4, 0) return state def get_pixels(self): """ Returns the pixels in a (GRID*20, GRID*20, 3) size array/ Unfortunatly it has to render in order to gather the pixels """ return pygame.surfarray.array3d(pygame.display.get_surface()) def controls(self): """ Defines all the players controls during the game """ action = 0 # Do nothing as default for event in pygame.event.get(): # print(event) # DEBUGGING if event.type == pygame.QUIT: self.end() if event.type == pygame.KEYDOWN: # print(event.key) #DEBUGGING # In order to quit easily if (event.key == pygame.K_q): self.end() # Moving up if (event.key == pygame.K_UP or event.key == pygame.K_w): if self.ACTION_SPACE == 5: action = 1 # up # Moving down elif (event.key == pygame.K_DOWN or event.key == pygame.K_s): if self.ACTION_SPACE == 5: action = 2 # down # Moving left elif (event.key == pygame.K_LEFT or event.key == pygame.K_a): if self.ACTION_SPACE == 5: action = 3 # left # Moving right elif (event.key == pygame.K_RIGHT or event.key == pygame.K_d): if self.ACTION_SPACE == 5: action = 4 # right return action def play(self): """ Lets you simply play the game Useful for debugging and testing out the environment """ GAME_OVER = False self.prerender() # self.reset() for i in range(10): # MAP_NUMBER = np.random.randint(5) # MAP_NUMBER = 1 # MAP_PATH = "./Maps/Grid{}/map{}.txt".format(self.GRID_SIZE-2, MAP_NUMBER) # MAP_PATH = "./Maps/Grid{}/impossible_map_empty{}.txt".format(self.GRID_SIZE-2, MAP_NUMBER) # self.set_map(MAP_PATH) # self.reset() self.quick_reset() GAME_OVER = False while not GAME_OVER: # print(self.steve.history) action = self.controls() # action = self.render() # print(self.get_pixels().shape) s, r, GAME_OVER, i = self.step(action) # print("\n\n") # DEBUGGING # print(self.state_vector_3D()) # DEBUGGING # print(self.local_state_vector_3D()) # DEBUGGING # print(r) self.render() if GAME_OVER: print("Game Over: time:", i["time"]) self.end()