if xx + 1 <= grid_x and in_cars[xx + 1][yy][3] > 0: cross_ = cross[lab] cross_.car_nums[3] += in_cars[xx + 1][yy][3] cross[lab] = cross_ #in the same diagram as the above 'visual_before', showing the incoming cars visual.visual_peri(peri_cars, x, y, times, b, bias, bias_, bias_t, grid_x, grid_y) reward = 0 for xx in x: for yy in y: lab = str(xx) + str(yy) for i in range(4): reward = reward - cross[lab].car_nums[i]**2 #show the result of state tranformation in another diagram 'visual_after', i.e. the result of 'visual_before' visual.visual_after(cross, x, y, times, b, bias, bias_t) time.sleep(10) obs_ = [] for xx in x: for yy in y: lab = str(xx) + str(yy) obs_ = np.concatenate( (obs_, cross[lab].car_nums, cross[lab].light_state), axis=None) RL.store_transition(obs, action, reward, obs_) if steps > 200: RL.learn() if steps % 50 == 0:
peri_cars1, in_cars1 = cross1.state_change(action1) peri_cars2, in_cars2 = cross2.state_change(action2) print(peri_cars1, in_cars1, peri_cars2, in_cars2) visual.visual_peri(peri_cars1,peri_cars2) reward=0 for i in range (4): if cross2.q_states[i]==1: cross2.car_nums[i]+=in_cars1 if cross1.q_states[i]==1: cross1.car_nums[i]+=in_cars2 reward = reward - cross1.car_nums[i]**2 - cross2.car_nums[i]**2 visual.visual_after(cross1, cross2) obs_=np.concatenate((cross1.car_nums, cross1.light_state, cross2.car_nums, cross2.light_state),axis=None) RL.store_transition(obs,action,reward,obs_) if steps>200: RL.learn() if steps%50==0: print(steps,reward) reward_set.append(reward) step_set.append(steps) #plt.scatter(steps, reward) obs=obs_ # window.mainloop() plt.plot(step_set,reward_set)