def demo(self): """ Function to make a demo of our agents """ self.env = None self.env = environment(self.model_name, self.vissim_working_directory, self.sim_length, self.Model_dictionnary, self.actions_set, \ Random_Seed = self.Random_Seed, timesteps_per_second = self.timesteps_per_second, mode = 'demo', delete_results = True, verbose = True) for idx, agent in self.Agents.items(): agent.reset() agent.epsilon = 0 #Set the exploration rate to 0 start_state = self.env.get_state() actions = {} # Initialisation for idx, s in start_state.items(): actions[idx] = self.Agents[idx].choose_action(s) while not self.env.done: SARSDs = self.env.step(actions) if self.env.action_required: actions = dict() for idx, sarsd in SARSDs.items(): s, a, r, ns, d = sarsd # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator actions[idx] = int(self.Agents[idx].choose_action(ns)) self.env.Stop_Simulation(delete_results=True) self.env = None
def demo(self): self.env = None self.env = environment(self.model_name, self.vissim_working_directory, self.sim_length, self.Model_dictionnary, self.actions_set, \ Random_Seed=self.Random_Seed, timesteps_per_second=self.timesteps_per_second, mode='demo', delete_results=True, verbose=True) start_state = self.env.get_state() actions = {} for idx, s in start_state.items(): actions[idx] = self.Agents[idx].choose_action(s) # Simulation Loop, Run until end of simulation t = 0 while (self.sim_length - 3) > self.env.global_counter: # self.Agents[idx].delay.append(self.env.SCUs[0].calculate_delay()) SARSDs = self.env.step_to_next_action(actions) actions = dict() for idx, sarsd in SARSDs.items(): s, a, r, ns, d = sarsd # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator actions[idx] = int(self.Agents[idx].choose_action(ns)) t += 1 self.env = None
def get_data(self): """ Function to train the agents input the number of episode of training """ self.env = None self.env = environment(self.model_name, self.vissim_working_directory, self.sim_length, self.Model_dictionnary, self.actions_set, \ self.Random_Seed, timesteps_per_second=self.timesteps_per_second, mode='training', delete_results=True, verbose=True) # Get initial State start_state = self.env.get_state() print("start") # Episodic training loop # Create dictionary for chosen actions for each agent and fill it actions = {} for idx, s in start_state.items(): actions[idx] = self.Agents[idx].choose_action(s) # Simulation Loop, Run until end of simulation t = 0 while (self.sim_length - 3) > self.env.global_counter: #self.Agents[idx].delay.append(self.env.SCUs[0].calculate_delay()) SARSDs = self.env.step_to_next_action(actions) actions = dict() for idx, sarsd in SARSDs.items(): s, a, r, ns, d = sarsd # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator actions[idx] = int(self.Agents[idx].choose_action(ns)) t += 1 self.env = None
def train(self, number_of_episode, vissim=False): if vissim is not False: vissim.Simulation.Stop() self.env = None self.env = environment(self.model_name, self.vissim_working_directory, self.sim_length, self.Model_dictionnary, actions_set='default_actions', Random_Seed=self.Random_Seed, timesteps_per_second=self.timesteps_per_second, mode='training', delete_results=True, verbose=True, vissim=vissim) for idx, agent in self.Agents.items(): agent.reset() start_state = self.env.get_state() while self.number_of_episode < number_of_episode: actions = {} for idx, s in start_state.items(): actions[idx] = self.Agents[idx].choose_action(s) while True: SARSDs = self.env.step_to_next_action(actions) actions = dict() for idx, sarsd in SARSDs.items(): s, a, r, ns, d = sarsd #print(sarsd) self.Agents[idx].remember(s, a, r, ns, d) if len(self.Agents[idx].memory ) >= self.Agents[idx].n_step_size: self.Agents[idx].learn() # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator actions[idx] = int(self.Agents[idx].choose_action(ns)) # For the saving , monitoring of the agent if self.env.done: self.env.reset() self.Random_Seed += 1 self.number_of_episode += 1 print('Episode {} is finished'.format( self.number_of_episode)) # if (i+1)%reduce_entropy_every == 0: # if Agents[idx].params['entropy'] >= entropy_threshold : # Agents[idx].reduce_entropy() # print ("Agent {} : Entropy reduced to {} " .format(idx, Agents[idx].params['entropy'])) # Only for AC for idx, agent in self.Agents.items(): predicted_values, true_values, proba0, probas = agent.value_check( self.horizon, self.n_sample) print( "Agent {} : Predicted Values and True Return : \n {} \n {}" .format(idx, predicted_values, true_values)) print( "Agent {} : Proba distribution on those states : \n {}" .format(idx, probas)) print( "Agent {} : Proba distribution on the 0 state : \n {}" .format(idx, proba0)) agent.average_reward = np.mean(agent.episode_reward) agent.reward_storage.append(agent.average_reward) print("Average Reward for Agent {} this episode : {}". format(idx, round(agent.average_reward, 2))) agent.loss.append(agent.losses[2]) agent.best_agent(self.vissim_working_directory, self.model_name, self.Session_ID, self.Session_ID) agent.reset() if self.number_of_episode % self.save_every == 0: self.save(self.number_of_episode, self.save_location) break
def test(self, vissim=False): """ Function to test our agents on one episode with all the metrics : queues over time, delay Average reward of the agents. """ if vissim is not False: vissim.Simulation.Stop() self.env = None self.env = environment(self.model_name, self.vissim_working_directory, self.sim_length, self.Model_dictionnary, Random_Seed=self.Random_Seed, timesteps_per_second=self.timesteps_per_second, mode='test', actions_set='default_actions', delete_results=True, verbose=True, vissim=vissim) # Counter to change the demande during test demand_counter = 0 self.env.change_demand(self.env.vehicle_demand[demand_counter]) #Initialisation of the metrics Episode_Queues = {} # Cumulative_Episode_Delays = {} # Delay at each junction Cumulative_Episode_stop_Delays = {} # Delay at each junction Cumulative_Totale_network_delay = [0] Cumulative_Totale_network_stop_delay = [0] queues = self.env.get_queues() for idx, junction_queues in queues.items(): Episode_Queues[idx] = [junction_queues] delays = self.env.get_delays() for idx, junction_delay in delays.items(): Cumulative_Episode_Delays[idx] = [junction_delay] stop_delays = self.env.get_stop_delays() for idx, junction_stop_delay in stop_delays.items(): Cumulative_Episode_stop_Delays[idx] = [junction_stop_delay] for idx, agent in self.Agents.items(): agent.reset() start_state = self.env.get_state() actions = {} # Initialisation for idx, s in start_state.items(): actions[idx] = self.Agents[idx].choose_action(s) while not self.env.done: SARSDs = self.env.step(actions) queues = self.env.get_queues() for idx, junction_queues in queues.items(): Episode_Queues[idx].append(junction_queues) delays = self.env.get_delays() for idx, junction_delay in delays.items(): Cumulative_Episode_Delays[idx].append( Cumulative_Episode_Delays[idx][-1] + junction_delay) stop_delays = self.env.get_stop_delays() for idx, junction_stop_delay in stop_delays.items(): Cumulative_Episode_stop_Delays[idx].append( Cumulative_Episode_stop_Delays[idx][-1] + junction_stop_delay) Cumulative_Totale_network_delay.append( Cumulative_Totale_network_delay[-1] + self.env.get_delay_timestep()) Cumulative_Totale_network_stop_delay.append( Cumulative_Totale_network_stop_delay[-1] + self.env.get_stop_delay_timestep()) if self.env.action_required: actions = dict() for idx, sarsd in SARSDs.items(): s, a, r, ns, d = sarsd self.Agents[idx].remember(s, a, r, ns, d) # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator actions[idx] = int(self.Agents[idx].choose_action(ns)) if self.env.global_counter % 360 == 0: demand_counter += 1 self.env.change_demand(self.env.vehicle_demand[demand_counter]) # Stop the simulation without erasing the database self.env.Stop_Simulation(delete_results=False) self.env = None return (Episode_Queues, Cumulative_Episode_Delays, Cumulative_Episode_stop_Delays, Cumulative_Totale_network_delay, Cumulative_Totale_network_stop_delay)
def train(self, number_of_episode, vissim=False): """ Function to train the agents input the number of episode of training """ if vissim is not False: vissim.Simulation.Stop() self.env = None self.env = environment(self.model_name, self.vissim_working_directory, self.sim_length, self.Model_dictionnary, actions_set='default_actions', Random_Seed=self.Random_Seed, timesteps_per_second=self.timesteps_per_second, mode='train', delete_results=True, verbose=True, vissim=vissim) # Reset lists for episode reward and episode memory for idx, agent in self.Agents.items(): agent.reset() # Get initial State start_state = self.env.get_state() print("start") demand_counter = 0 no_of_demands = len(self.env.vehicle_demand) - 1 # Episodic training loop while self.number_of_episode < number_of_episode: # Episodic training loop # Counter to change the demande during test # THIS NEEDS CHANGING NEIL!!! demand_counter = np.random.randint(9) ## ATTENTION HERE. DEMAND CHANGES DEACTIVATED. self.env.change_demand(self.env.vehicle_demand[demand_counter]) self.env.change_demand(self.env.vehicle_demand[demand_counter]) # Create dictionary for chosen actions for each agent and fill it actions = {} for idx, s in start_state.items(): actions[idx] = self.Agents[idx].choose_action(s) # Simulation Loop, Run until end of simulation while True: SARSDs = self.env.step_to_next_action(actions) actions = dict() for idx, sarsd in SARSDs.items(): s, a, r, ns, d = sarsd #print(sarsd) self.Agents[idx].remember(s, a, r, ns, d) # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator actions[idx] = int(self.Agents[idx].choose_action(ns)) # For the saving , monitoring of the agent if self.env.done: self.env.reset() self.Random_Seed += 1 self.number_of_episode += 1 print('Episode {}: Finished running.'.format( self.number_of_episode)) for idx, agent in self.Agents.items(): agent.average_reward = np.mean(agent.episode_reward) agent.reward_storage.append(agent.average_reward) print("Agent {}, Average Reward: {}".format( idx, round(agent.average_reward, 2))) agent.best_agent(self.vissim_working_directory, self.model_name, self.agent_type, self.Session_ID) for i in range(self.learning_iterations): agent.learn_batch(self.batch_size, 1) if self.number_of_episode % self.copy_weights_frequency == 0: agent.copy_weights() agent.reset() if self.number_of_episode % self.save_every == 0: self.save(self.number_of_episode) # Decrease the exploration rate self.advance_schedule() if self.number_of_episode != number_of_episode + 1: print('Episode {}: Starting computation.'.format( self.number_of_episode + 1)) break self.env = None
def prepopulate_memory(self, vissim=False): # Chech if suitable folder exists prepopulation_directory = os.path.join(self.vissim_working_directory, self.model_name, "Agents_Results", self.agent_type, self.Session_ID) if not os.path.exists(prepopulation_directory): os.makedirs(prepopulation_directory) # Chech if suitable file exists if self.PER_activated: PER_prepopulation_filename = os.path.join( prepopulation_directory, 'Agent' + str(0) + '_PERPre_' + str(self.memory_size) + '.p') else: PER_prepopulation_filename = os.path.join( prepopulation_directory, 'Agent' + str(0) + '_Pre_' + str(self.memory_size) + '.p') prepopulation_exists = os.path.isfile(PER_prepopulation_filename) # If it does, process it into the memory if prepopulation_exists: if self.PER_activated: print("Previous Experience: Found. Loading into agents") for idx, agent in self.Agents.items(): PER_prepopulation_filename = os.path.join( prepopulation_directory, 'Agent' + str(idx) + '_PERPre_' + str(self.memory_size) + '.p') memory = pickle.load(open(PER_prepopulation_filename, 'rb')) print( "Previous Experience: Successfully loaded file from:") print(PER_prepopulation_filename) for s, a, r, s, d in memory: agent.remember(s, a, r, s, d) # FCalculate importance sampling weights update_priority_weights(agent, self.memory_size) else: for idx, agent in self.Agents.items(): PER_prepopulation_filename = os.path.join( prepopulation_directory, 'Agent' + str(idx) + '_Pre_' + str(self.memory_size) + '.p') agent.memory = pickle.load( open(PER_prepopulation_filename, 'rb')) return else: print("Experience file not found. Generating now...") # keep the count of the number of transition in each agent memory agents_memory = {} for idx, agent in self.Agents.items(): agents_memory[idx] = [] # 10000 is a random number to have a simulation speed quick enough self.env = environment(self.model_name, self.vissim_working_directory, self.sim_length, self.Model_dictionnary, self.actions_set, \ self.Random_Seed, timesteps_per_second = self.timesteps_per_second, mode = 'training', delete_results = True, verbose = True, vissim = vissim) memory_full = False # Time counter number_of_action_taken = 0 start_state = self.env.get_state() actions = {} for idx, s in start_state.items(): actions[idx] = int(self.Agents[idx].choose_action(s)) while not memory_full: SARSDs = self.env.step_to_next_action(actions) if number_of_action_taken % 1000 == 0: for idx, memory in agents_memory.items(): print("After {} actions taken by the Agents, Agent {} memory is {} percent full"\ .format(number_of_action_taken, idx , np.round(100*len(memory)/self.memory_size,2))) actions = dict() for idx, sarsd in SARSDs.items(): s, a, r, ns, d = sarsd #print(sarsd) self.Agents[idx].remember(s, a, r, ns, d) agents_memory[idx].append([s, a, r, ns, d]) # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator actions[idx] = int(self.Agents[idx].choose_action(ns)) number_of_action_taken += 1 # check if all the agents have their memory full memory_full = True for idx, memory in agents_memory.items(): if len(memory) < self.memory_size: memory_full = False # For the saving , monitoring of the agent if self.env.done: self.env.reset() actions = {} for idx, s in start_state.items(): actions[idx] = self.Agents[idx].choose_action(s) for idx, agent in self.Agents.items(): if self.PER_activated: update_priority_weights(agent, self.memory_size) PER_prepopulation_filename = os.path.join( prepopulation_directory, 'Agent' + str(idx) + '_PERPre_' + str(self.memory_size) + '.p') # Dump random transitions into pickle file for later prepopulation of PER print("Memory filled. Saving as:" + PER_prepopulation_filename) pickle.dump(agents_memory[idx], open(PER_prepopulation_filename, 'wb')) else: PER_prepopulation_filename = os.path.join( prepopulation_directory, 'Agent' + str(idx) + '_Pre_' + str(self.memory_size) + '.p') print("Memory filled. Saving as:" + PER_prepopulation_filename) pickle.dump(agents_memory[idx], open(PER_prepopulation_filename, 'wb'))
def test(self, vissim=False): """ Function to test our agents on one episode with all the metrics : queues over time, delay Average reward of the agents. """ if vissim is not False: vissim.Simulation.Stop() self.env = None self.env = environment(self.model_name, self.vissim_working_directory, self.sim_length, self.Model_dictionnary, actions_set='default_actions', Random_Seed=self.Random_Seed, timesteps_per_second=self.timesteps_per_second, mode='test', delete_results=True, verbose=True, vissim=vissim) # Counter to change the demande during test demand_counter = 0 ## ATTENTION HERE. DEMAND CHANGES DEACTIVATED. #self.env.change_demand(self.env.vehicle_demand[demand_counter]) #self.env.change_demand(self.env.vehicle_demand[demand_counter]) #Initialisation of the metrics self.Episode_Queues = {} # self.Cumulative_Episode_Delays = {} # Delay at each junction self.Cumulative_Episode_stop_Delays = {} # Delay at each junction self.Cumulative_Totale_network_delay = [0] self.Cumulative_Totale_network_stop_delay = [0] queues = self.env.get_queues() for idx, junction_queues in queues.items(): self.Episode_Queues[idx] = [junction_queues] delays = self.env.get_delays() for idx, junction_delay in delays.items(): self.Cumulative_Episode_Delays[idx] = [junction_delay] stop_delays = self.env.get_stop_delays() for idx, junction_stop_delay in stop_delays.items(): self.Cumulative_Episode_stop_Delays[idx] = [junction_stop_delay] for idx, agent in self.Agents.items(): agent.reset() agent.epsilon = 0 #Set the exploration rate to 0 start_state = self.env.get_state() actions = {} # Initialisation for idx, s in start_state.items(): actions[idx] = self.Agents[idx].choose_action(s) # Simulation while not self.env.done: # Make the enfironment take a step SARSDs = self.env.step(actions) # Read the queues and store them queues = self.env.get_queues() for idx, junction_queues in queues.items(): self.Episode_Queues[idx].append(junction_queues) # Do the same with the global delays delays = self.env.get_delays() for idx, junction_delay in delays.items(): self.Cumulative_Episode_Delays[idx].append( self.Cumulative_Episode_Delays[idx][-1] + junction_delay) # And again with the stop delay stop_delays = self.env.get_stop_delays() for idx, junction_stop_delay in stop_delays.items(): self.Cumulative_Episode_stop_Delays[idx].append( self.Cumulative_Episode_stop_Delays[idx][-1] + junction_stop_delay) self.Cumulative_Totale_network_delay.append( self.Cumulative_Totale_network_delay[-1] + self.env.get_delay_timestep()) self.Cumulative_Totale_network_stop_delay.append( self.Cumulative_Totale_network_stop_delay[-1] + self.env.get_stop_delay_timestep()) # Whenever an action is required if self.env.action_required: actions = dict() for idx, sarsd in SARSDs.items(): s, a, r, ns, d = sarsd self.Agents[idx].remember(s, a, r, ns, d) # in order to find the next action you need to evaluate the "next_state" because it is the current state of the simulator actions[idx] = int(self.Agents[idx].choose_action(ns)) ## ATTENTION: CHANGE DEMAND DEACTIVATED #if self.env.global_counter% 360 == 0: # demand_counter += 1 # self.env.change_demand(self.env.vehicle_demand[demand_counter]) # Stop the simulation without erasing the database self.env.Stop_Simulation(delete_results=False) self.env = None