def __init__(self, env, size=None, update_delay=1.0, display=True, live_plot=True): self.env = env self.size = size if size is not None else ((self.env.grid_size[0] + 1) * self.env.block_size, (self.env.grid_size[1] + 1) * self.env.block_size) self.width, self.height = self.size self.bg_color = self.colors['white'] self.road_width = 5 self.road_color = self.colors['black'] self.quit = False self.start_time = None self.current_time = 0.0 self.last_updated = 0.0 self.update_delay = update_delay # duration between each step (in secs) self.display = display if self.display: try: self.pygame = importlib.import_module('pygame') self.pygame.init() self.screen = self.pygame.display.set_mode(self.size) self.frame_delay = max(1, int(self.update_delay * 1000)) # delay between GUI frames in ms (min: 1) self.agent_sprite_size = (32, 32) self.agent_circle_radius = 10 # radius of circle, when using simple representation for agent in self.env.agent_states: agent._sprite = self.pygame.transform.smoothscale(self.pygame.image.load(os.path.join("images", "car-{}.png".format(agent.color))), self.agent_sprite_size) agent._sprite_size = (agent._sprite.get_width(), agent._sprite.get_height()) self.font = self.pygame.font.Font(None, 28) self.paused = False except ImportError as e: self.display = False print "Simulator.__init__(): Unable to import pygame; display disabled.\n{}: {}".format(e.__class__.__name__, e) except Exception as e: self.display = False print "Simulator.__init__(): Error initializing GUI objects; display disabled.\n{}: {}".format(e.__class__.__name__, e) # Setup metrics to report self.live_plot = live_plot self.rep = Reporter(metrics=['net_reward', 'avg_net_reward', 'final_deadline', 'success'], live_plot=self.live_plot) self.avg_net_reward_window = 30
def __init__(self, env, size=None, update_delay=1.0, display=True, live_plot=False): self.env = env self.size = size if size is not None else ((self.env.grid_size[0] + 1) * self.env.block_size, (self.env.grid_size[1] + 1) * self.env.block_size) self.width, self.height = self.size self.bg_color = self.colors['white'] self.road_width = 5 self.road_color = self.colors['black'] self.quit = False self.start_time = None self.current_time = 0.0 self.last_updated = 0.0 self.update_delay = update_delay # duration between each step (in secs) self.display = display if self.display: try: self.pygame = importlib.import_module('pygame') self.pygame.init() self.screen = self.pygame.display.set_mode(self.size) self.frame_delay = max(1, int(self.update_delay * 1000)) # delay between GUI frames in ms (min: 1) self.agent_sprite_size = (32, 32) self.agent_circle_radius = 10 # radius of circle, when using simple representation for agent in self.env.agent_states: agent._sprite = self.pygame.transform.smoothscale(self.pygame.image.load(os.path.join("images", "car-{}.png".format(agent.color))), self.agent_sprite_size) agent._sprite_size = (agent._sprite.get_width(), agent._sprite.get_height()) self.font = self.pygame.font.Font(None, 28) self.paused = False except ImportError as e: self.display = False #print "Simulator.__init__(): Unable to import pygame; display disabled.\n{}: {}".format(e.__class__.__name__, e) except Exception as e: self.display = False # print "Simulator.__init__(): Error initializing GUI objects; display disabled.\n{}: {}".format(e.__class__.__name__, e) # Setup metrics to report self.live_plot = live_plot self.rep = Reporter(metrics=['net_reward', 'avg_net_reward', 'final_deadline', 'success'], live_plot=self.live_plot) self.avg_net_reward_window = 10
def __init__(self, env): super(LearningAgent, self).__init__( env) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.actions = [None, 'forward', 'left', 'right'] # Parameters in the Q - Learning algorithm self.alpha = 0.5 # learning rate (alpha) 0 < alpha < 1 self.gamma = 0.8 # discount factor(gamma) 0 < gamma < 1 self.eps = 1 # exploration rate(epsilon) 0 < eps < 1 self.action_0 = None # Action in t-1 self.reward_0 = None # Reward in t-1 self.state_0 = None # State in t-1 self.Q = {} # Empty Q table self.experience = 1 # smartcab "experience", increases with time # Setup infraction metrics to report self.live_plot = False self.infractions_rep = Reporter(metrics=['invalid_moves'], live_plot=self.live_plot)
class Simulator(object): """Simulates agents in a dynamic smartcab environment. Uses PyGame to display GUI, if available. """ colors = { 'black' : ( 0, 0, 0), 'white' : (255, 255, 255), 'red' : (255, 0, 0), 'green' : ( 0, 255, 0), 'blue' : ( 0, 0, 255), 'cyan' : ( 0, 200, 200), 'magenta' : (200, 0, 200), 'yellow' : (255, 255, 0), 'orange' : (255, 128, 0) } def __init__(self, env, size=None, update_delay=1.0, display=True, live_plot=False): self.env = env self.size = size if size is not None else ((self.env.grid_size[0] + 1) * self.env.block_size, (self.env.grid_size[1] + 1) * self.env.block_size) self.width, self.height = self.size self.bg_color = self.colors['white'] self.road_width = 5 self.road_color = self.colors['black'] self.quit = False self.start_time = None self.current_time = 0.0 self.last_updated = 0.0 self.update_delay = update_delay # duration between each step (in secs) self.display = display if self.display: try: self.pygame = importlib.import_module('pygame') self.pygame.init() self.screen = self.pygame.display.set_mode(self.size) self.frame_delay = max(1, int(self.update_delay * 1000)) # delay between GUI frames in ms (min: 1) self.agent_sprite_size = (32, 32) self.agent_circle_radius = 10 # radius of circle, when using simple representation for agent in self.env.agent_states: agent._sprite = self.pygame.transform.smoothscale(self.pygame.image.load(os.path.join("images", "car-{}.png".format(agent.color))), self.agent_sprite_size) agent._sprite_size = (agent._sprite.get_width(), agent._sprite.get_height()) self.font = self.pygame.font.Font(None, 28) self.paused = False except ImportError as e: self.display = False #print "Simulator.__init__(): Unable to import pygame; display disabled.\n{}: {}".format(e.__class__.__name__, e) except Exception as e: self.display = False # print "Simulator.__init__(): Error initializing GUI objects; display disabled.\n{}: {}".format(e.__class__.__name__, e) # Setup metrics to report self.live_plot = live_plot self.rep = Reporter(metrics=['net_reward', 'avg_net_reward', 'final_deadline', 'success'], live_plot=self.live_plot) self.avg_net_reward_window = 10 def run(self, n_trials=1): self.quit = False self.rep.reset() for trial in xrange(n_trials): print "Simulator.run(): Trial {}".format(trial) # [debug] self.env.reset() self.current_time = 0.0 self.last_updated = 0.0 self.start_time = time.time() prev_penalty = self.env.primary_agent.n_penalties while True: try: # Update current time self.current_time = time.time() - self.start_time #print "Simulator.run(): current_time = {:.3f}".format(self.current_time) # Handle GUI events if self.display: for event in self.pygame.event.get(): if event.type == self.pygame.QUIT: self.quit = True elif event.type == self.pygame.KEYDOWN: if event.key == 27: # Esc self.quit = True elif event.unicode == u' ': self.paused = True if self.paused: self.pause() # Update environment if self.current_time - self.last_updated >= self.update_delay: self.env.step() # TODO: Log step data self.last_updated = self.current_time # Render GUI and sleep if self.display: self.render() self.pygame.time.wait(self.frame_delay) except KeyboardInterrupt: self.quit = True finally: if self.quit or self.env.done: state = self.env.agent_states[self.env.primary_agent] if state['location'] != state['destination']: self.env.primary_agent.last_dest_fail = trial + 1 if self.env.primary_agent.n_penalties > prev_penalty: self.env.primary_agent.last_penalty = trial + 1 break p_agent = self.env.primary_agent if self.quit: break # Collect/update metrics self.rep.collect('net_reward', trial, self.env.trial_data['net_reward']) # total reward obtained in this trial self.rep.collect('avg_net_reward', trial, np.mean(self.rep.metrics['net_reward'].ydata[-self.avg_net_reward_window:])) # rolling mean of reward self.rep.collect('final_deadline', trial, self.env.trial_data['final_deadline']) # final deadline value (time remaining) self.rep.collect('success', trial, self.env.trial_data['success']) if self.live_plot: self.rep.refresh_plot() # autoscales axes, draws stuff and flushes events # Report final metrics if self.display: self.pygame.display.quit() # need to shutdown pygame before showing metrics plot # TODO: Figure out why having both game and plot displays makes things crash! if self.live_plot: self.rep.show_plot() # holds till user closes plot window return (p_agent.n_dest_reached, p_agent.last_dest_fail, p_agent.sum_time_left, p_agent.n_penalties, p_agent.last_penalty, len(p_agent.qvals)) def render(self): # Clear screen self.screen.fill(self.bg_color) # Draw elements # * Static elements for road in self.env.roads: self.pygame.draw.line(self.screen, self.road_color, (road[0][0] * self.env.block_size, road[0][1] * self.env.block_size), (road[1][0] * self.env.block_size, road[1][1] * self.env.block_size), self.road_width) for intersection, traffic_light in self.env.intersections.iteritems(): self.pygame.draw.circle(self.screen, self.road_color, (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size), 10) if traffic_light.state: # North-South is open self.pygame.draw.line(self.screen, self.colors['green'], (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size - 15), (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size + 15), self.road_width) else: # East-West is open self.pygame.draw.line(self.screen, self.colors['green'], (intersection[0] * self.env.block_size - 15, intersection[1] * self.env.block_size), (intersection[0] * self.env.block_size + 15, intersection[1] * self.env.block_size), self.road_width) # * Dynamic elements for agent, state in self.env.agent_states.iteritems(): # Compute precise agent location here (back from the intersection some) agent_offset = (2 * state['heading'][0] * self.agent_circle_radius, 2 * state['heading'][1] * self.agent_circle_radius) agent_pos = (state['location'][0] * self.env.block_size - agent_offset[0], state['location'][1] * self.env.block_size - agent_offset[1]) agent_color = self.colors[agent.color] if hasattr(agent, '_sprite') and agent._sprite is not None: # Draw agent sprite (image), properly rotated rotated_sprite = agent._sprite if state['heading'] == (1, 0) else self.pygame.transform.rotate(agent._sprite, 180 if state['heading'][0] == -1 else state['heading'][1] * -90) self.screen.blit(rotated_sprite, self.pygame.rect.Rect(agent_pos[0] - agent._sprite_size[0] / 2, agent_pos[1] - agent._sprite_size[1] / 2, agent._sprite_size[0], agent._sprite_size[1])) else: # Draw simple agent (circle with a short line segment poking out to indicate heading) self.pygame.draw.circle(self.screen, agent_color, agent_pos, self.agent_circle_radius) self.pygame.draw.line(self.screen, agent_color, agent_pos, state['location'], self.road_width) if agent.get_next_waypoint() is not None: self.screen.blit(self.font.render(agent.get_next_waypoint(), True, agent_color, self.bg_color), (agent_pos[0] + 10, agent_pos[1] + 10)) if state['destination'] is not None: self.pygame.draw.circle(self.screen, agent_color, (state['destination'][0] * self.env.block_size, state['destination'][1] * self.env.block_size), 6) self.pygame.draw.circle(self.screen, agent_color, (state['destination'][0] * self.env.block_size, state['destination'][1] * self.env.block_size), 15, 2) # * Overlays text_y = 10 for text in self.env.status_text.split('\n'): self.screen.blit(self.font.render(text, True, self.colors['red'], self.bg_color), (100, text_y)) text_y += 20 # Flip buffers self.pygame.display.flip() def pause(self): abs_pause_time = time.time() pause_text = "[PAUSED] Press any key to continue..." self.screen.blit(self.font.render(pause_text, True, self.colors['cyan'], self.bg_color), (100, self.height - 40)) self.pygame.display.flip() #print pause_text # [debug] while self.paused: for event in self.pygame.event.get(): if event.type == self.pygame.KEYDOWN: self.paused = False self.pygame.time.wait(self.frame_delay) self.screen.blit(self.font.render(pause_text, True, self.bg_color, self.bg_color), (100, self.height - 40)) self.start_time += (time.time() - abs_pause_time)
class Simulator(object): """Simulates agents in a dynamic smartcab environment. Uses PyGame to display GUI, if available. """ colors = { 'black': (0, 0, 0), 'white': (255, 255, 255), 'red': (255, 0, 0), 'green': (0, 255, 0), 'blue': (0, 0, 255), 'cyan': (0, 200, 200), 'magenta': (200, 0, 200), 'yellow': (255, 255, 0), 'orange': (255, 128, 0) } def __init__(self, env, size=None, update_delay=1.0, display=True, live_plot=False): self.env = env self.size = size if size is not None else ( (self.env.grid_size[0] + 1) * self.env.block_size, (self.env.grid_size[1] + 1) * self.env.block_size) self.width, self.height = self.size self.bg_color = self.colors['white'] self.road_width = 5 self.road_color = self.colors['black'] self.quit = False self.start_time = None self.current_time = 0.0 self.last_updated = 0.0 self.update_delay = update_delay # duration between each step (in secs) self.display = display if self.display: try: self.pygame = importlib.import_module('pygame') self.pygame.init() self.screen = self.pygame.display.set_mode(self.size) self.frame_delay = max( 1, int(self.update_delay * 1000)) # delay between GUI frames in ms (min: 1) self.agent_sprite_size = (32, 32) self.agent_circle_radius = 10 # radius of circle, when using simple representation for agent in self.env.agent_states: agent._sprite = self.pygame.transform.smoothscale( self.pygame.image.load( os.path.join("../images", "car-{}.png".format(agent.color))), self.agent_sprite_size) agent._sprite_size = (agent._sprite.get_width(), agent._sprite.get_height()) self.font = self.pygame.font.Font(None, 28) self.paused = False except ImportError as e: self.display = False print "Simulator.__init__(): Unable to import pygame; display disabled.\n{}: {}".format( e.__class__.__name__, e) except Exception as e: self.display = False print "Simulator.__init__(): Error initializing GUI objects; display disabled.\n{}: {}".format( e.__class__.__name__, e) # Setup metrics to report self.live_plot = live_plot self.rep = Reporter(metrics=[ 'net_reward', 'avg_net_reward', 'final_deadline', 'success' ], live_plot=self.live_plot) self.avg_net_reward_window = 10 def run(self, n_trials=1): self.quit = False self.rep.reset() for trial in xrange(n_trials): print "Simulator.run(): Trial {}".format(trial) # [debug] self.env.reset() self.current_time = 0.0 self.last_updated = 0.0 self.start_time = time.time() while True: try: # Update current time self.current_time = time.time() - self.start_time #print "Simulator.run(): current_time = {:.3f}".format(self.current_time) # Handle GUI events if self.display: for event in self.pygame.event.get(): if event.type == self.pygame.QUIT: self.quit = True elif event.type == self.pygame.KEYDOWN: if event.key == 27: # Esc self.quit = True elif event.unicode == u' ': self.paused = True if self.paused: self.pause() # Update environment if self.current_time - self.last_updated >= self.update_delay: self.env.step() # TODO: Log step data self.last_updated = self.current_time # Render GUI and sleep if self.display: self.render() self.pygame.time.wait(self.frame_delay) except KeyboardInterrupt: self.quit = True finally: if self.quit or self.env.done: break if self.quit: break # Collect/update metrics self.rep.collect('net_reward', trial, self.env.trial_data['net_reward'] ) # total reward obtained in this trial self.rep.collect( 'avg_net_reward', trial, np.mean(self.rep.metrics['net_reward'].ydata[ -self.avg_net_reward_window:])) # rolling mean of reward self.rep.collect('final_deadline', trial, self.env.trial_data['final_deadline'] ) # final deadline value (time remaining) self.rep.collect('success', trial, self.env.trial_data['success']) if self.live_plot: self.rep.refresh_plot( ) # autoscales axes, draws stuff and flushes events # Report final metrics if self.display: self.pygame.display.quit( ) # need to shutdown pygame before showing metrics plot # TODO: Figure out why having both game and plot displays makes things crash! if self.live_plot: self.rep.show_plot() # holds till user closes plot window def render(self): # Clear screen self.screen.fill(self.bg_color) # Draw elements # * Static elements for road in self.env.roads: self.pygame.draw.line(self.screen, self.road_color, (road[0][0] * self.env.block_size, road[0][1] * self.env.block_size), (road[1][0] * self.env.block_size, road[1][1] * self.env.block_size), self.road_width) for intersection, traffic_light in self.env.intersections.iteritems(): self.pygame.draw.circle(self.screen, self.road_color, (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size), 10) if traffic_light.state: # North-South is open self.pygame.draw.line( self.screen, self.colors['green'], (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size - 15), (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size + 15), self.road_width) else: # East-West is open self.pygame.draw.line( self.screen, self.colors['green'], (intersection[0] * self.env.block_size - 15, intersection[1] * self.env.block_size), (intersection[0] * self.env.block_size + 15, intersection[1] * self.env.block_size), self.road_width) # * Dynamic elements for agent, state in self.env.agent_states.iteritems(): # Compute precise agent location here (back from the intersection some) agent_offset = (2 * state['heading'][0] * self.agent_circle_radius, 2 * state['heading'][1] * self.agent_circle_radius) agent_pos = (state['location'][0] * self.env.block_size - agent_offset[0], state['location'][1] * self.env.block_size - agent_offset[1]) agent_color = self.colors[agent.color] if hasattr(agent, '_sprite') and agent._sprite is not None: # Draw agent sprite (image), properly rotated rotated_sprite = agent._sprite if state['heading'] == ( 1, 0) else self.pygame.transform.rotate( agent._sprite, 180 if state['heading'][0] == -1 else state['heading'][1] * -90) self.screen.blit( rotated_sprite, self.pygame.rect.Rect( agent_pos[0] - agent._sprite_size[0] / 2, agent_pos[1] - agent._sprite_size[1] / 2, agent._sprite_size[0], agent._sprite_size[1])) else: # Draw simple agent (circle with a short line segment poking out to indicate heading) self.pygame.draw.circle(self.screen, agent_color, agent_pos, self.agent_circle_radius) self.pygame.draw.line(self.screen, agent_color, agent_pos, state['location'], self.road_width) if agent.get_next_waypoint() is not None: self.screen.blit( self.font.render(agent.get_next_waypoint(), True, agent_color, self.bg_color), (agent_pos[0] + 10, agent_pos[1] + 10)) if state['destination'] is not None: self.pygame.draw.circle( self.screen, agent_color, (state['destination'][0] * self.env.block_size, state['destination'][1] * self.env.block_size), 6) self.pygame.draw.circle( self.screen, agent_color, (state['destination'][0] * self.env.block_size, state['destination'][1] * self.env.block_size), 15, 2) # * Overlays text_y = 10 for text in self.env.status_text.split('\n'): self.screen.blit( self.font.render(text, True, self.colors['red'], self.bg_color), (100, text_y)) text_y += 20 # Flip buffers self.pygame.display.flip() def pause(self): abs_pause_time = time.time() pause_text = "[PAUSED] Press any key to continue..." self.screen.blit( self.font.render(pause_text, True, self.colors['cyan'], self.bg_color), (100, self.height - 40)) self.pygame.display.flip() print pause_text # [debug] while self.paused: for event in self.pygame.event.get(): if event.type == self.pygame.KEYDOWN: self.paused = False self.pygame.time.wait(self.frame_delay) self.screen.blit( self.font.render(pause_text, True, self.bg_color, self.bg_color), (100, self.height - 40)) self.start_time += (time.time() - abs_pause_time)
def run(): """Run the agent for a finite number of trials.""" n_trials = 100 op_alpha = 0.0 # optimal learning rate op_gamma = 0.0 # optimal discount factor op_eps = 0.0 # optimal exploration rate # Setup metrics to report per simulation param_metrics = Reporter(metrics=['alpha', 'gamma', 'epsilon', 'trips_perf', 'infractions_perf'], live_plot=True) # Set up environment and agent e = Environment(num_dummies=3) # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.001, display=False, live_plot=True) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False idx = 0 eps_range = [0] #np.arange(0,0.2,0.05) gamma_range = [0.1] #np.arange(0,0.6,0.1) alpha_range = [0.4] #np.arange(0.2,0.6,0.05) for epsilon in eps_range: for gamma in gamma_range: for alpha in alpha_range: print "Running simulation for: ", epsilon, gamma, alpha # Run n_sims simulations for given parameters and store results in lists trips_perf and infractions_perf n_sims = 1 trips_perf = [] infractions_perf = [] for i in range(n_sims): # Set agent parameters and reset experience a.alpha = alpha a.gama = gamma a.eps = epsilon a.experience = 1 a.infractions_rep.reset() sim.run(n_trials=n_trials) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line # print sim.rep.summary() for metric in sim.rep.summary(): if metric.name == 'success': t_p = metric.sum() * 100 / n_trials print "Name: {}, samples: {}, Performance: {}%".format(metric.name, len(metric), t_p) trips_perf.append(t_p) for metric in a.infractions_rep.summary(): if metric.name == 'invalid_moves': i_p = abs(metric[metric == -1.0].sum()) * 100 / metric.size print "Name: {}, samples: {}, Performance: {}%".format(metric.name, len(metric), i_p) infractions_perf.append(i_p) # Collect metrics param_metrics.collect('alpha', idx, alpha) param_metrics.collect('gamma', idx, gamma) param_metrics.collect('epsilon', idx, epsilon) param_metrics.collect('trips_perf', idx, pd.Series(trips_perf).mean()) param_metrics.collect('infractions_perf', idx, pd.Series(infractions_perf).mean()) idx += 1 # Show results results = pd.DataFrame(param_metrics.summary()).transpose() print results print 'Best configuration for trips performance:' print results.loc[results['trips_perf'].idxmax()] print 'Best configuration for traffic rules performance:' print results.loc[results['infractions_perf'].idxmin()] param_metrics.refresh_plot() param_metrics.show_plot()
class LearningAgent(Agent): """An agent that learns to drive in the smartcab world.""" def __init__(self, env): super(LearningAgent, self).__init__( env) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.actions = [None, 'forward', 'left', 'right'] # Parameters in the Q - Learning algorithm self.alpha = 0.5 # learning rate (alpha) 0 < alpha < 1 self.gamma = 0.8 # discount factor(gamma) 0 < gamma < 1 self.eps = 1 # exploration rate(epsilon) 0 < eps < 1 self.action_0 = None # Action in t-1 self.reward_0 = None # Reward in t-1 self.state_0 = None # State in t-1 self.Q = {} # Empty Q table self.experience = 1 # smartcab "experience", increases with time # Setup infraction metrics to report self.live_plot = False self.infractions_rep = Reporter(metrics=['invalid_moves'], live_plot=self.live_plot) def reset(self, destination=None): self.planner.route_to(destination) # TODO: Prepare for a new trip; reset any variables here, if required self.action_0 = None self.reward_0 = None self.state_0 = None def update(self, t): # Alpha decay rate alpha_dr = self.experience # Eps decay rate eps_dr = math.log(self.experience + 2) # "+2" avoids div. by zero # Increase experience self.experience += 1 # Gather inputs self.next_waypoint = self.planner.next_waypoint() # from route planner, also displayed by simulator inputs = self.env.sense(self) deadline = self.env.get_deadline(self) # TODO: Update state (current state) self.state = (tuple(inputs.values()), self.next_waypoint) #print 'Current state:', self.state # Create zero-initialized state entry if current state doesn't exist in Q table if not self.Q.has_key(self.state): self.Q[self.state] = dict([(action, 0.0) for action in self.actions]) #print 'Q table entry for current state:', self.Q[self.state] # print 'Q table:', self.Q # TODO: Select action according to your policy # Find actions with max q-values for current state and randomly pick one of it if more than one. best_actions = [act for act, q_value in self.Q[self.state].iteritems() if q_value == max(self.Q[self.state].values())] if self.eps / eps_dr < random.random(): action = best_actions[random.randint(0, len(best_actions) - 1)] # Select action using Q-table else: action = self.actions[random.randint(0, len(self.actions) - 1)] # Select action randomly #print 'Action to perform:', action, 'from best candidates:', best_actions # Execute action and get reward reward = self.env.act(self, action) self.infractions_rep.collect('invalid_moves', t, reward) if self.live_plot: self.infractions_rep.refresh_plot() # autoscales axes, draws stuff and flushes events # TODO: Learn policy based on state, action, reward # Perform the calculation delayed by 1 step. Skip for first iteration (t=0). if not (None in [self.state_0, self.action_0, self.reward_0]): self.Q[self.state_0][self.action_0] = (1 - self.alpha / alpha_dr) * \ self.Q[self.state_0][self.action_0] + self.alpha / alpha_dr * \ (self.reward_0 + self.gamma * max(self.Q[self.state].iteritems(), key=op.itemgetter(1))[1]) # Save current state, action and reward for next step self.state_0 = self.state self.action_0 = action self.reward_0 = reward