def __init__(self, tau=True): self.action_num = 4 self.shape = [7, 7] self.time = 0 self.state = self.reset() self.components = 7 _, state_num = np.where(self.state[:, 0:6] == 1) self.state_num = state_num if tau: plot_state(self.state, X=7, Y=7)
def randomint(self, stau=False): s = np.random.randint(0, 6, 7) self.time = np.random.randint(0, 100, 1)[0] raw_s = np.zeros([7, 7], dtype=np.int32) raw_s[:, 0:6] = processsa(s, 6) raw_s[:, 6] = time_encoder(self.time) if stau == True: plot_state(raw_s, 7, 7) self.state = raw_s self.state_num = s return self.state, self.time
actions = np.zeros([num_episodes,100,7], dtype=np.int32) states = np.zeros([num_episodes,101,7], dtype=np.int32) path1 = simulation_path + '/training-step' + str(step) if os.path.exists(path1)==False: os.mkdir(path1) for i_episode in range(num_episodes): t=0 rAll = 0 done = False current_state = env.reset() a = [] states[i_episode,0] = env.state_num while not done: plt.figure(1) plot_state(current_state, 7, 7) plt.xlabel('states', **font) plt.ylabel('components', **font) state = np.reshape(current_state,[49]) action = sess.run(mainQN.predict, feed_dict={mainQN.scalarInput: [state]})[0] next_state, next_reward, done = env.step(action) next_reward = next_reward/600 rAll += np.sum(next_reward) actions[i_episode,t] = action states[i_episode,t+1] = env.state_num plt.title('action:' + str(action) + ' time:' + str(env.time) + '\n' + 'reward:' + np.array2string(next_reward,precision=2) +' total costs:' + str("%.2f"%rAll),**font) plt.savefig(path1 + '/' + str(t) + '.eps') # # plt.savefig(c_path + '/' + str(t) + '.tif') # plt.savefig(path1 + '/' + str(t) + '.png')
def render(self): plot_state(self.state, 7, 7)