示例#1
0
 def init_mdp_old(self):    
     #get all network nodes
     node_count = len(traci.junction.getIDList())
     mdp = []        
     for node in range(0, node_count):
         no = (self.network.getNode(str(node+1)))
         list_edges = Node.getOutgoing(no)
         #for each node find all outgoing edges
         states = []
         for l in list_edges:
             #add the actions (edges) to the states (nodes): edge-id, q-value, reward (travel time on link)
             states.append([str(Edge.getID(l)), -50*random.random(), -50*random.random()])
         mdp.append(states)
     return mdp
示例#2
0
 def init_mdp(self):    
     #get all network nodes
     junction_list = traci.junction.getIDList()
     usefull = [i for i in junction_list if not ':' in i]
     mdp = dict([(x, []) for x in usefull])
     for node in usefull:
         no = (self.network.getNode(node))
         list_edges = Node.getOutgoing(no)
         states = dict([(x.getID(), []) for x in list_edges if not ':' in x.getID()])
         #for each node find all outgoing edgesTrue
         for e in list_edges:
             #add the actions (edges) to the states (nodes): edge-id, q-value, reward (travel time on link)
             states[e.getID()] = ([10*random.random(), 10*random.random()])
         mdp[node] = states
     return mdp
示例#3
0
    def process_vehicle(self, driver):                
                id_current_edge = traci.lane.getEdgeID(traci.vehicle.getLaneID(self.drivers[int(driver)].id))
                #Driver needs a new route
                if( self.isEdge(id_current_edge) and
                        (self.drivers[int(driver)].current_link == id_current_edge) and
                        (self.drivers[int(driver)].current_link!= self.drivers[int(driver)].destination) and
                        (self.drivers[int(driver)].isUpdate)):
                                            
                    next_node = self.get_destination_node(id_current_edge)
                    id_next_node = Node.getID(next_node)
                    action_key = ''
                    #next action,
                    if(random.random()<self.qlearning.epislon):
                        #returns one action randomly
                        action_key = self.drivers[int(driver)].mdp[id_next_node].keys()[random.randint(0, len(self.drivers[int(driver)].mdp[id_next_node].keys())-1)]
                        
                    #exploitation
                    else:
                        #returns the action that minimize the travel_time the reward        
                        action_key = self.return_best_action(driver, id_next_node)

                    #update q-table
                    #current q_value
                    current_q_value = self.drivers[int(driver)].mdp[id_next_node][action_key][0]
                    #action reward
                    reward = -1*self.drivers[int(driver)].mdp[id_next_node][action_key][1]
                        
                    #node maximizes the reward on action
                    future_node = self.get_destination_node(action_key)
                    #id node maximizes the reward on action
                    id_new_future_edge = Node.getID(future_node)
                    
                    #q_value of the best action
                    best_action = self.drivers[int(driver)].mdp[id_new_future_edge][self.return_best_action(driver, id_new_future_edge)][0]
                    
                    #new q_value
                    q_value = ((1-self.qlearning.alpha)*current_q_value 
                                        + self.qlearning.alpha*(abs(reward)+self.qlearning.gamma*best_action))
                    
                    #updates q_value
                    self.drivers[int(driver)].mdp[id_next_node][action_key][0] = q_value
                    
                    #if it's ok, update the route just on dijkstra
                    new_route = self.return_route(self.drivers[int(driver)].current_link, action_key)
                      
                    #insert the new route in the vehicle
                    traci.vehicle.setRoute(self.drivers[int(driver)].id, new_route)                     
                    #update the driver steps
                    self.drivers[int(driver)].steps += 1
                    self.drivers[int(driver)].isUpdate = False                        
                
                #update the travel time on the last action
                elif(self.isEdge(id_current_edge) and (self.drivers[int(driver)].current_link!=id_current_edge)):
                    origin_node = self.get_origin_node(self.drivers[int(driver)].current_link).getID()
                    
                    for action in self.drivers[int(driver)].mdp[origin_node]:
                        #set the total travel time on link
                        if str(action) == str(self.drivers[int(driver)].current_link):
                            link_tt = self.drivers[int(driver)].get_travel_time_on_link(self.get_time())
                            if link_tt < 0:
                                print 'errado'
                            self.drivers[int(driver)].mdp[origin_node][action][1] = link_tt
                            break
                    #update the drivers properties
                    self.drivers[int(driver)].current_link = id_current_edge
                    self.drivers[int(driver)].link_tt = self.get_time()
                    self.drivers[int(driver)].isUpdate = True
                
                #The car is arriving on the destination
                elif(self.isEdge(id_current_edge) and
                    (self.drivers[int(driver)].current_link==id_current_edge) and 
                    (self.drivers[int(driver)].current_link == self.drivers[int(driver)].destination)):
                    
                    last_node = self.get_origin_node(self.drivers[int(driver)].current_link)
                    next_node = self.get_destination_node(self.drivers[int(driver)].current_link).getID()

                    for action in self.drivers[int(driver)].mdp[last_node.getID()].keys():
                            if(action==str(self.drivers[int(driver)].current_link)):
                                '''in the future we need change this for the total travel time*-1'''
                                #total_tt = - self.drivers[int(driver)].get_total_travel_time(self.get_time())
                                self.drivers[int(driver)].mdp[last_node.getID()][action][0] = -self.drivers[int(driver)].get_total_travel_time(self.get_time())
                                break
                                    
                    #driver rechead his goal
                    #print '\t\tpassos ', self.drivers[int(driver)].steps
                    self.drivers[int(driver)].isArrived = True
                    self.running_drivers.remove(self.drivers[int(driver)].id)