def run(self): """Runs the instance.""" clock = Clock() while self._is_active: # redraw screen self._redraw_screen() # check for events for event in get_event(): # clicking quit button of window kills the game if event.type == QUIT: quit_pygame() # clicking key board button to move paddle if event.type == KEYDOWN: # button is key W if event.key == K_w: self._paddle1.velocity = -MAX_VELOCITY # button is key S elif event.key == K_s: self._paddle1.velocity = MAX_VELOCITY # button is up arrow key if event.key == K_UP: self._paddle2.velocity = -MAX_VELOCITY # button is down arrow key elif event.key == K_DOWN: self._paddle2.velocity = MAX_VELOCITY if event.type == KEYUP: # button is key W or key S if event.key == K_w or event.key == K_s: self._paddle1.velocity = 0 # button is up arrow key or down arrow key if event.key == K_UP or event.key == K_DOWN: self._paddle2.velocity = 0 # update coordinates of paddles and ball self._move_paddles_and_ball() # handle collisions of paddles with walls and of ball with # walls and paddles self._handle_wall_collision() self._handle_paddles_ball_collision() # set count of updates clock.tick(FRAMES_PER_SECOND) self._draw_game_over_screen() while pygame_is_active(): # check for events for event in get_event(): # clicking quit button of window kills the game if event.type == QUIT: quit_pygame()
def run(self): """Runs the instance.""" clock = Clock() while self._n_lives > 0: # redraw screen self._redraw_screen() # check for events for event in get_event(): # clicking quit button of window kills the game if event.type == QUIT: quit_pygame() # clicking key board button to move paddle if event.type == KEYDOWN: # button is left arrow key if event.key == K_LEFT: self._paddle.velocity = -MAX_VELOCITY # button is right arrow key elif event.key == K_RIGHT: self._paddle.velocity = MAX_VELOCITY # update coordinates of paddle and ball self._move_paddle_and_ball() # handle collisions of paddle with walls and of ball with # walls and paddle self._handle_wall_collision() self._handle_paddle_ball_collision() # set count of updates clock.tick(FRAMES_PER_SECOND) self._draw_game_over_screen() while pygame_is_active(): # check for events for event in get_event(): # clicking quit button of window kills the game if event.type == QUIT: quit_pygame()
def close_game(self): quit_pygame()
def simulate(maze: gym.envs, n_episodes, winning_streak=100, learning_rate=0.01, epsilon=0.3, decay=1.0, policy="EG", starting_value=0.0, discount_factor=0.99, display=False): """ :param maze: gym.env :param n_episodes: Total # of episodes to attempts, before giving up (must be considerably larger than winning_streak) :param winning_streak: <int> # of successes in a row, after which the optimal policy is assumed to have been learnt :param learning_rate: <float> :param epsilon: Parameter for exploration :param decay: Decay rate (for exploration and learning) when policy is eps. decay :param policy: "EG", "ED" or "UCB" for epsilon-greedy, decaying epsilon-greedy and UCB policies :param starting_value: initialization value for Q(s,a) :param discount_factor: Discount for rewards :param display: Whether to display the PyGame console :return: The episode at which winning_streak was achieved """ maze_size = tuple((maze.observation_space.high + np.ones(maze.observation_space.shape)).astype(int)) maze_boundary = list( zip(maze.observation_space.low, maze.observation_space.high)) # If winning_streak is not achieved within this, then terminate max_steps = np.prod(maze_size, dtype=int) * 100 # If maze is solved with more steps than this, then FAIL max_steps_for_success = np.prod(maze_size, dtype=int) q_table = np.ones(maze_size + (maze.action_space.n, ), dtype=float) * starting_value # For UCB method, we need to store upper confidence bounds if policy == "UCB": ucb_table = np.ones(maze_size + (maze.action_space.n, ), dtype=float) * epsilon success_streak = 0 # Number of times we solved the maze (in a row) fail_streak = 0 # Number of times learning failed (in a row) # Simulation results losses = [] returns = [] winning_episode = 0 if display: maze.render() for episode in range(n_episodes): # Reset the environment new_state = maze.reset() # the initial state state = bound_state(new_state, maze_boundary) total_reward = 0 for t in range(max_steps): # Select an action # Using EPSILON GREEDY if policy == "EG": action = select_action_eps_greedy( action_space=maze.action_space, q_values=q_table[state], epsilon=epsilon) # Using DECAYING-EPSILON GREEDY elif policy == "ED": action = select_action_eps_greedy( action_space=maze.action_space, q_values=q_table[state], epsilon=epsilon) epsilon = np.max([epsilon * decay, 0.001]) learning_rate = np.max([learning_rate * decay, 0.001]) # Using UPPER CONFIDENCE BOUNDS elif policy == "UCB": action = select_action_ucb(q_values=q_table[state], ucbs=ucb_table[state]) ucb_table[state][action] *= decay # Execute the action new_state, reward, solved, _ = maze.step(action) # Observe the reward new_state = bound_state(new_state, maze_boundary) total_reward += reward # Update Q(s,a) q_max = np.amax(q_table[new_state]) loss = reward + discount_factor * q_max - q_table[state + (action, )] q_table[state + (action, )] += learning_rate * loss # For next iteration state = new_state losses.append(loss) # Render PyGame frame if display: maze.render() # Update # of fails in a row if t == max_steps - 1: fail_streak += 1 if solved: fail_streak = 0 returns.append(total_reward) # Update # of successes in a row if t <= max_steps_for_success: success_streak += 1 else: success_streak = 0 break # Conditions for Win / Loss # If <losing_streak> # of failures were achieved in a row if fail_streak > losing_streak: # print(f"Failed {losing_streak} times in a row...") break # If <winning_streak> # of successes were achieved in a row if success_streak > winning_streak: winning_episode = episode break try: return { "winning_episode": winning_episode, "losses": losses, "avg_losses": [ np.average(losses[i * 10:(i + 1) * 10]) for i in range(int(len(losses) / 10)) ], } finally: quit_pygame()
def _handle(self, evt, *args, **kwargs): if self._on_receipt != None: self._on_receipt(evt, *args, **kwargs) quit_pygame()