def static_control(track_select='SS'): Environment.track_generator(track, track_select=track_select) env = Environment.Environment(track, 10000) gui = GUI.GUI(track, cars, trace=True) car_objects = [Environment.Car(c) for c in cars] env.compute_interaction(car_objects) GOALS = ((0.33, 0), (0.14, 0.6), (0.4, 0), (0.148, -0.6)) while (True): for goal in GOALS: for i in range(len(car_objects)): car_objects[i].set_velocity(goal[0]) car_objects[i].set_steering(goal[1]) for i in range(100): debug_data = '' for i in range(len(car_objects)): if car_objects[i].state == 'collided': debug_data += 'Car ' + str(i) + '\nCollided!\n\n' continue car_objects[i].update(dt) s_r = car_objects[i].get_sensor_reading() gui.update(i, car_objects[i].get_state()) debug_data += 'Car ' + str( i) + '\nSensor readings:' + ', '.join( ['{:.2f}'.format(x) for x in s_r]) + '\nCar score=' + '{:.2f}'.format( car_objects[i].score) + '\n' env.compute_interaction(car_objects) gui.update_debug_info(debug_data) gui.refresh()
def user_control(track_select='SS'): Environment.track_generator(track, track_select=track_select) env = Environment.Environment(track, 10000) gui = GUI.GUI(track, cars, trace=True) car_objects = [Environment.Car(c) for c in cars] env.compute_interaction(car_objects) while (True): d = { 'w': [0.5, 0.0], 's': [-0.3, 0.0], 'a': [0.1, 0.6], 'd': [0.1, -0.6] } try: [v, s] = d[raw_input()] except: [v, s] = [0, 0] for agent in car_objects: agent.set_velocity(v) agent.set_steering(s) for i in range(10): debug_data = '' for i in range(len(car_objects)): if car_objects[i].state == 'collided': debug_data += 'Car ' + str(i) + '\nCollided!\n\n' continue car_objects[i].update(dt) s_r = car_objects[i].get_sensor_reading() gui.update(i, car_objects[i].get_state()) debug_data += 'Car ' + str( i) + '\nSensor readings:' + ', '.join( ['{:.2f}'.format(x) for x in s_r]) + '\nCar score=' + '{:.2f}'.format( car_objects[i].score) + '\n' env.compute_interaction(car_objects) gui.update_debug_info(debug_data) gui.refresh()
def reinfrocement_neural_network_control(load_weights=None, run_only=False, track_select='SS', random_seed=None, rl_prams=None): run = run_only weights_save_dir = "./weights/" if not os.path.exists(weights_save_dir): os.makedirs(weights_save_dir) Environment.track_generator(track, track_select=track_select) env = Environment.Environment(track, rl_parameters['max_steps']) gui = GUI.GUI(track, cars, trace=True) car_objects = [Environment.Car(c) for c in cars] rl = RL.QLearning_NN(rl_prams, weights_save_dir=weights_save_dir) rl.generate_nn() if load_weights is not None: if load_weights == 'all': run = True else: rl.load_weights(load_weights) if random_seed is not None: rl.random_seed(random_seed) weight_names = sorted([name for name in glob.glob(weights_save_dir + '*')]) weight_names_index = 0 def initialize(run_state): env.compute_interaction(car_objects) for car in car_objects: car.reset() car.get_sensor_reading() if run_state == True: env.set_max_steps(1500) gui.remove_traces() gui.disable_trace() gui.set_run_select(gui.runs[1]) gui.update_debug_info('[Testing]\n' + 'Currently learned weights loaded') else: env.set_max_steps(rl_prams['max_steps']) gui.enable_trace() gui.set_run_select(gui.runs[0]) gui.update_debug_info('[Training]\n') def check_run_button(current_state): if gui.get_run_select() == gui.runs[0] and current_state == True: print '\n\n\nLearning\n' initialize(run_state=False) return False if gui.get_run_select() == gui.runs[1] and run == False: print '\n\n\nRun only\n' initialize(run_state=True) return True return None initialize(run_state=run) while (1): new_run_state = check_run_button(current_state=run) if new_run_state is not None: run = new_run_state if run == True: for i, car in enumerate(car_objects): terminal = rl.run_step(car, env, dt) if terminal is not None: print 'Car', i, ':', terminal if i == 0: if load_weights == 'all' and weight_names_index < len( weight_names): rl.load_weights(weight_names[weight_names_index]) gui.update_debug_info( '[Testing]\n' + 'Weights loaded:\n' + weight_names[weight_names_index]) weight_names_index += 1 gui.update(i, car.get_state()) env.compute_interaction(car_objects) gui.refresh() else: terminal, debug, epoch, avg_loss, final_score, cross_score = rl.learn_step( car_objects[0], env, dt) if terminal is not None: if debug is not None: gui.update_debug_info(debug) gui.update_graph(epoch, avg_loss, gui.graphs[0]) gui.update_graph(epoch, final_score, gui.graphs[1]) gui.update_graph(epoch, cross_score, gui.graphs[2]) gui.refresh() gui.update(0, terminal, draw_car=False, force_end_line=True) gui.refresh() if rl.epoch % 100 == 0: gui.update(0, car_objects[0].get_state(), draw_car=True) gui.refresh() else: gui.update(0, car_objects[0].get_state(), draw_car=False)