replace_target_iter=100, memory_size=2000, e_greedy_increment=0.001,) total_steps = 0 ep_rhistory = [] for i_episode in range(500): observation = env.reset() ep_r = 0 while True: # env.render() action = RL.choose_action(observation.flatten()) observation_, reward, done, info = env.step(action) # the smaller theta and closer to center the better # print(observation_) # x, x_dot, theta, theta_dot = observation_ # r1 = (env.x_threshold - abs(x))/env.x_threshold - 0.8 # r2 = (env.theta_threshold_radians - abs(theta))/env.theta_threshold_radians - 0.5 # reward = r1 + r2 RL.store_transition(observation.flatten(), action, reward, observation_.flatten()) ep_r += reward if total_steps > 1000: RL.learn()
#Initializing cross = crossroads_map(x, y) visual = Visual() obs = [] for xx in x: for yy in y: lab = str(xx) + str(yy) obs = np.concatenate( (obs, cross[lab].car_nums, cross[lab].light_state), axis=None) #Training steps for steps in range(200000): visual.visual_before(cross, x, y, times, b, bias, bias_t) action = RL.choose_action(obs) action_set = [[0 for i in range(grid_y + 1)] for j in range(grid_x + 1)] peri_cars = [[([0] * 4) for i in range(grid_y + 1)] for j in range(grid_x + 1)] in_cars = [[([0] * 4) for i in range(grid_y + 1)] for j in range(grid_x + 1)] #light state changes, cars numbers change, interactions between crossroads and peripherals for xx in x: for yy in y: lab = str(xx) + str(yy) #10->binary coding for action(1 value), like if action=128, 9 bits binary coding #of it is 010000000, indicating a 3*3 grid with each crossroad having action of #'0''1''0''0''0''0''0''0''0'(storing in action set), each action is either '0' or '1', #for 'change state' or 'keep on'. The binary number is set to be (grid_x*grid_y) bits,