def reset(self, Date, path): self.env = envirment.Env(Date, path) self.a = np.zeros((1,self.a_dim)) for i in range(self.a_dim): if np.random.uniform()<0.2: self.a[0,i] = 1 self.a = np.ravel(self.a) self.a_last = np.ravel(np.zeros((1,self.a_dim))) self.s = np.ravel(np.zeros((1,self.s_dim))) self.s_ = np.ravel(np.zeros((1,self.s_dim))) self.s = self.env.update(self.a, self.a_last, 0, 0) self.s_ = self.s self.a_last = self.a self.s = np.transpose(self.s) self.s_ = np.transpose(self.s_)
os.makedirs(path+'/car_L1_reward_figure') os.makedirs(path+'/AdaptSpeed') os.makedirs(path+'/ConvergenceRate') os.makedirs(path+'/log') os.makedirs(path+'/result_images') os.makedirs(path+'/SystemPerformance') Date = 1001 ''' main program ''' # dqn = dqn.DQN() Agent = control_group.GreedyPolicy.Greedy(a_dim) ddpg = PDDPG.PDDPG(a_dim, s_dim) # ddpg = ddpg.DDPG(a_dim, s_dim) env = envirment.Env(Date, path) # retarder = Queue() sys_per = output.SystemPerformance(a_dim) line_fig = output.LineFigures(a_dim) conv_rate = output.ConvergenceRate(MAX_EPISODES, SLOTNUM, a_dim, s_dim) adapt_speed = output.AdaptSpeed(SLOTNUM, a_dim) EPSILON = 0.95 a = np.zeros((1,a_dim)) for i in range(a_dim): if np.random.uniform()<0.2: a[0,i] = 1 a = np.ravel(a)