def run(self, fitness_function, trafic, SUMO, n=None): """ Runs NEAT's genetic algorithm for at most n generations. If n is None, run until solution is found or extinction occurs. The user-provided fitness_function must take only two arguments: 1. The population as a list of (genome id, genome) tuples. 2. The current configuration object. The return value of the fitness function is ignored, but it must assign a Python float to the `fitness` member of each genome. The fitness function is free to maintain external state, perform evaluations in parallel, etc. It is assumed that fitness_function does not modify the list of genomes, the genomes themselves (apart from updating the fitness member), or the configuration object. """ number_episodes=n send_iterator=0 if self.config.no_fitness_termination and (n is None): raise RuntimeError("Cannot have no generational limit with no fitness termination") """Initialise simulation =======================================================================================""" traci.route.add(trafic.vehicle_ego.RouteID, trafic.vehicle_ego.Route) if trafic.vehicle2_exist: traci.route.add(trafic.vehicle_2.RouteID, trafic.vehicle_2.Route) if trafic.vehicle3_exist: traci.route.add(trafic.vehicle_3.RouteID, trafic.vehicle_3.Route) traci.vehicletype.setSpeedFactor(typeID='traffic_vehicle', factor=5.0) cum_reward = np.zeros((number_episodes, 1)) best_cum_reward = -1000000 reward_mean100 = np.zeros((number_episodes, 1)) length_episode = np.zeros((number_episodes, 1)) data_export = np.zeros((number_episodes, 2)) restart_step = 0 # counter for calculating the reset timing when the simulation time gets close to 24 days cum_reward_evaluation = [0] # list for cum reward of evaluation episodes evaluation = False sub_ego = {} # traci.vehicle.setSpeedMode(trafic.vehicle_ego.ID, 16) if trafic.training: trafic.vehicle_ego.depart_speed = np.random.randint(0, 30, size=number_episodes) else: trafic.vehicle_ego.depart_speed = ego_depart_speed traci.trafficlight.setProgram(tlsID='junction1', programID=TLS_ID) k = 0 while n is None or k < n: k += 1 episode=k try: # for keyboard interrupt """Check if total simulation time is close to 24 days ======================================================""" # TraCI time inputs have a maximum value of ~24days --> restart SUMO to reset time if np.sum(length_episode[restart_step:])*SUMO.sim['timestep'] > 2000000: print('Almost 24 days of simulation time reached! Restarting SUMO and continue with next episode...') traci.close() traci.start(['sumo', '-c', 'SUMO_config.sumocfg']) traci.route.add(trafic.vehicle_ego.RouteID, trafic.vehicle_ego.Route) if trafic.vehicle2_exist: traci.route.add(trafic.vehicle_2.RouteID, trafic.vehicle_2.Route) if trafic.vehicle3_exist: traci.route.add(trafic.vehicle_3.RouteID, trafic.vehicle_3.Route) restart_step = episode print('Episode: ', episode, '/', number_episodes) send_iterator+=1 if send_iterator==10: msg='Episode: '+ str(episode)+ '/'+ str(number_episodes) andIsendtomyself(msg) send_iterator=0 """Initialise episode ==================================================================================""" # SUMO.init_vars_episode() # dynamics_ego.reset_variables() # if controller == 'DQN' or controller == 'DDPG' or controller == 'hybrid_a' or controller == 'DDPG_v': # nn_controller.reset_variables() # if controller == 'ACC' or controller == 'hybrid_a': # acc_controller.create_mode_map() # if exploration_policy == 'ACC': # explo_policy.create_mode_map() # if (controller == 'DDPG' or controller == 'hybrid_a' or controller == 'DDPG_v') and ((episode+1) % 5 == 0): # perform an evaluation episode (without exploration noise) every x episodes to observe the cum reward progress # evaluation = True """Anmerkung: Hier werden einige Variationen des Verkehrsszenarios für meine Trainingsepisoden definiert, wenn 'training = True' gesetzt ist. Im Fall 'training = False' oder 'evaluation = True' (Evaluierungsepisoden unter gleichen Randbedingungen) wird immer eine # Episode mit gleichen Randbedingungen (z.B. Geschwindigkeitsprofil vorausfahrendes Fahrzeug) gesetzt""" # if trafic.evaluation: # traci.vehicle.add(trafic.vehicle_ego.ID, trafic.vehicle_ego.RouteID, departSpeed='0', # typeID='ego_vehicle') # Ego vehicle # traci.trafficlight.setPhase('junction1', 0) # set traffic light phase to 0 for evaluation (same conditions) # else: # traci.vehicle.add(trafic.vehicle_ego.ID, trafic.vehicle_ego.RouteID, departSpeed=np.array2string(trafic.vehicle_ego.depart_speed[episode]), typeID='ego_vehicle') # Ego vehicle if not trafic.training: traci.trafficlight.setPhase('junction1', 0) if trafic.training and not evaluation and trafic.vehicle3_exist: trafic.vehicle3 = np.random.choice([True, False], p=[0.95, 0.05]) traci.lane.setMaxSpeed('gneE01_0', np.random.choice([8.33, 13.89, 19.44, 25.])) traci.lane.setMaxSpeed('gneE02_0', np.random.choice([8.33, 13.89, 19.44, 25.])) traci.lane.setMaxSpeed('startedge_0', np.random.choice([8.33, 13.89, 19.44, 25.])) SUMO.create_v_profile_prec(a=SUMO.prec_train_amplitude[episode-1], c=SUMO.prec_train_mean[episode-1]) else: trafic.vehicle3 = vehicle3_exist traci.lane.setMaxSpeed('startedge_0', 13.89) # 13.89 traci.lane.setMaxSpeed('gneE01_0', 19.44) # 19.44 traci.lane.setMaxSpeed('gneE02_0', 13.89) # 13.89 traci.lane.setMaxSpeed('stopedge_0', 8.33) # 8.33 trafic.episoden_variante=np.random.rand()*240. # if trafic.vehicle2_exist: # traci.vehicle.add(vehicle_2.ID, vehicle_2.RouteID, typeID='traffic_vehicle') # preceding vehicle 1 # if trafic.vehicle3: # traci.vehicle.add(trafic.vehicle_3.ID, trafic.vehicle_3.RouteID, typeID='traffic_vehicle') # preceding vehicle 2 # if trafic.training and not evaluation: # traci.vehicle.moveTo(trafic.vehicle_3.ID, 'gneE01_0', np.random.rand()*240.) # else: # traci.vehicle.moveTo(trafic.vehicle_3.ID, 'gneE01_0', 0.) # # traci.simulationStep() # to spawn vehicles ## if controller != 'SUMO': ## traci.vehicle.setSpeedMode(trafic.vehicle_ego.ID, 16) # only emergency stopping at red traffic lights --> episode ends # if trafic.vehicle2_exist: # traci.vehicle.setSpeedMode(trafic.vehicle_2.ID, 17) # if trafic.vehicle3: # traci.vehicle.setSpeedMode(trafic.vehicle_3.ID, 17) # # SUMO.currentvehiclelist = traci.vehicle.getIDList() # # # SUMO subscriptions # traci.vehicle.subscribeLeader(trafic.vehicle_ego.ID, 10000) # traci.vehicle.subscribe(trafic.vehicle_ego.ID, [traci.constants.VAR_SPEED, traci.constants.VAR_BEST_LANES, traci.constants.VAR_FUELCONSUMPTION, # traci.constants.VAR_NEXT_TLS, traci.constants.VAR_ALLOWED_SPEED, traci.constants.VAR_LANE_ID]) # self.reporters.start_generation(self.generation) # print(self.population[49+k]) # Evaluate all genomes using the user-provided function. pool=Pool(processes=os.cpu_count()) pool.starmap(fitness_function, zip(list(iteritems(self.population)), repeat(self.config), repeat(episode))) # print(self.fitness) # Gather and report statistics. best = None for g in itervalues(self.population): if best is None or g.fitness > best.fitness: best = g # print(best.fitness, best.size(),self.species.get_species_id(best.key),best.key) self.reporters.post_evaluate(self.config, self.population, self.species, best) # Track the best genome ever seen. if self.best_genome is None or best.fitness > self.best_genome.fitness: self.best_genome = best if not self.config.no_fitness_termination: # End if the fitness threshold is reached. fv = self.fitness_criterion(g.fitness for g in itervalues(self.population)) if fv >= self.config.fitness_threshold: self.reporters.found_solution(self.config, self.generation, best) break # Create the next generation from the current generation. self.population = self.reproduction.reproduce(self.config, self.species, self.config.pop_size, self.generation) # Check for complete extinction. if not self.species.species: self.reporters.complete_extinction() # If requested by the user, create a completely new population, # otherwise raise an exception. if self.config.reset_on_extinction: self.population = self.reproduction.create_new(self.config.genome_type, self.config.genome_config, self.config.pop_size) else: raise CompleteExtinctionException() # Divide the new population into species. self.species.speciate(self.config, self.population, self.generation) self.reporters.end_generation(self.config, self.population, self.species) self.generation += 1 if self.config.no_fitness_termination: self.reporters.found_solution(self.config, self.generation, self.best_genome) # # print('Cumulative Reward:', cum_reward[episode]) # if evaluation: # cum_reward_evaluation.append(cum_reward[episode]) # evaluation = False # if cum_reward[episode] > best_cum_reward: # nn_controller.save_models(savefile_best_actor+'_'+str(episode), savefile_best_critic+'_'+str(episode)) # best_cum_reward = cum_reward[episode] # # if training and (controller == 'DQN' or controller == 'hybrid_a' or controller == 'DDPG' or controller == 'DDPG_v') and liveplot: # reward_mean100[episode] = nn_controller.reward_mean_100_running(cum_reward, episode) # nn_controller.weight_observer(episode) # plot_running(reward_mean100, episode, cum_reward_evaluation) # data_export[:, 0] = cum_reward[:, 0] # data_export[:, 1] = length_episode[:, 0] # if training: # try: # if (episode+1) % 25 == 0: # ==> save rewards every 50 episodes # np.savetxt(savefile_reward, data_export) # if (episode+1) % 25 == 0: # save model every 50 episodes # nn_controller.save_models(savefile_model_actor, savefile_model_critic) # # except OSError: # print('File saving failed') # pass # # if acc_controller: # acc_controller.reset_integral_error() # except KeyboardInterrupt: print('Manual interrupt') break traci.close() # traci.start(['sumo-gui', '-c', 'SUMO_config.sumocfg']) # sbr.result(self.best_genome, self.config, trafic) # sbr.eval_genomes(self.best_genome, self.config, 0, SUMO, trafic) now=datetime.now() nowstr=now.strftime('%Y%m%d%H%M%S') with open('H:\\MT\\Python\\NEAT und SUMO\\saved models\\'+'best_genome_neat'+nowstr , 'wb') as f: pickle.dump(self.best_genome, f) return self.best_genome
nn = 1 k = 0 while number_episodes is None or k < number_episodes: k += 1 episode = k error = True error_ref = True try: # for keyboard interrupt print('\n Episode: ', episode, '/', number_episodes) send_iterator += 1 save_iterator += 1 if send_iterator == 30: msg = 'Episode: ' + str(episode) + '/' + str(number_episodes) andIsendtomyself(msg) send_iterator = 0 # p.reporters.start_generation(p.generation) # y = 0 sim_id = [] for sims in range(1, len(p.population) + 1): if y == os.cpu_count() - 1: y = 0 else: y += 1 sim_id.append(y) pop_input = list(iteritems(p.population))
if trafic.training and liveplot: fig_running, ax_running_1, ax_running_2, ax_running_3, ax_running_4 = plot_running_init( training) """run simulation ==============================================================================================""" start = timeit.default_timer() sim_liste = [] for core in range(1, os.cpu_count()): sim_liste.append('sim' + str(core)) if trafic.training or sample_generation: for sim in sim_liste: traci.start(['sumo', '-c', 'SUMO_config.sumocfg', '--no-warnings'], label=sim) else: traci.start(['sumo-gui', '-c', 'SUMO_config.sumocfg']) best_genome = run(config_path, trafic, SUMO) andIsendtomyself('Geschafft!') traci.close #cum_reward = run_control() """Postprocessing ===============================================================================================""" # ### save keras model ### # if training: # nn_controller.save_models(savefile_model_actor, savefile_model_critic) ### postprocess SUMO data for plotting ### SUMO.postproc_v() stop = timeit.default_timer() print('Calculation time: ', stop - start) # if training and (controller == 'DQN' or controller == 'DDPG'): # reward_mean100 = nn_controller.reward_mean100(cum_reward) # calculate the mean of last 100 episodes # else: # reward_mean100 = []
format_string='{:0'+str(len(str(number_episodes)))+'.0f}' nn=1 k = 0 while number_episodes is None or k < number_episodes: k += 1 episode=k error=True try: # for keyboard interrupt print('\nEpisode: ', episode, '/', number_episodes) send_iterator+=1 save_iterator+=1 if send_iterator==50: msg='Episode: '+ str(episode)+ '/'+ str(number_episodes) andIsendtomyself(msg) send_iterator=0 # p.reporters.start_generation(p.generation) # y=0 sim_id=[] for sims in range(1,len(p.population)+1): if y==os.cpu_count()-1: y=0 else: y+=1 sim_id.append(y) pop_input=list(iteritems(p.population)) while error: