def run(self): total_step = 1 while self.g_ep.value < MAX_EP: s = self.env.reset() buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0. for t in range(MAX_EP_STEP): if self.name == 'w0': self.env.render() a = self.lnet.choose_action(v_wrap(s[None, :])) s_, r, done, _ = self.env.step(a.clip(-2, 2)) if t == MAX_EP_STEP - 1: done = True ep_r += r buffer_a.append(a) buffer_s.append(s) buffer_r.append((r + 8.1) / 8.1) # normalize if total_step % UPDATE_GLOBAL_ITER == 0 or done: # update global and assign to local net # sync push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_a, buffer_r, GAMMA) buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ total_step += 1 self.res_queue.put(None)
def run(self): total_step = 1 while self.g_ep.value < MAX_EP: s = self.env.reset() buffer_s, buffer_action, buffer_reward = [], [], [] ep_r = 0. while True: if self.name == 'w00': self.env.render() a = self.lnet.choose_action(v_wrap(s[None, :])) s_, r, done, _ = self.env.step(a) if done: r = -1 ep_r += r buffer_action.append(a) buffer_s.append(s) buffer_reward.append(r) if total_step % UPDATE_GLOBAL_ITER == 0 or done: # update global and assign to local net # sync push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_action, buffer_reward, GAMMA) buffer_s, buffer_action, buffer_reward = [], [], [] if done: # done and print information record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ total_step += 1 self.res_queue.put(None)
def run(self): total_step = 1 while self.g_ep.value < MAX_EP: s = self.env.reset() buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0. while True: if self.name == 'w00': self.env.render() a = self.lnet.choose_action(v_wrap(s[None, :])) s_, r, done, _ = self.env.step(a) if done: r = -1 ep_r += r buffer_a.append(a) buffer_s.append(s) buffer_r.append(r) if total_step % UPDATE_GLOBAL_ITER == 0 or done: push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_a, buffer_r, GAMMA) buffer_s, buffer_a, buffer_r = [], [], [] if done: record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ total_step += 1 self.res_queue.put(None)
def run(self): total_step = 1 while self.g_ep.value < self.args.MAXEPS: # print(self.g_ep.value) s = self.env.reset() buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0 for t in range(self.args.MAXSTEP): if self.name == 'worker0': self.env.render() a = self.lnet.choose_action(v_wrap(s[None, :])) s_, r, done, _ = self.env.step(a.clip(-2, 2)) if t == self.args.MAXSTEP - 1: done = True ep_r += r buffer_s.append(s) buffer_a.append(a) buffer_r.append((r + 8.1) / 8.1) if total_step % self.args.updateperiod == 0 or done: # print(total_step) push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_a, buffer_r, self.args.gamma) buffer_s, buffer_a, buffer_r = [], [], [] if done: print('*') record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ total_step += 1 self.res_queue.put(None)
def run(self): total_step = 1 while self.g_ep.value < MAX_EP: try: mapID = np.random.randint( 1, 6) #Choosing a map ID from 5 maps in Maps folder randomly posID_x = np.random.randint( MAP_MAX_X ) #Choosing a initial position of the DQN agent on X-axes randomly posID_y = np.random.randint( MAP_MAX_Y ) #Choosing a initial position of the DQN agent on Y-axes randomly #Creating a request for initializing a map, initial position, the initial energy, and the maximum number of steps of the DQN agent request = ("map" + str(mapID) + "," + str(posID_x) + "," + str(posID_y) + ",50,100") #Send the request to the game environment (GAME_SOCKET_DUMMY.py) self.env.send_map_info(request) self.env.reset() s = self.env.get_state() print(s.shape) buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0. for t in range(MAX_EP_STEP): a = self.lnet.choose_action(v_wrap(s[None, :]), .5) self.env.step(str(a)) s_ = self.env.get_state() r = self.env.get_reward() done = self.env.check_terminate() if t == MAX_EP_STEP - 1: done = True ep_r += r buffer_a.append(a) buffer_s.append(s) buffer_r.append(r) if total_step % UPDATE_GLOBAL_ITER == 0 or done: # update global and assign to local net # sync push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_a, buffer_r, GAMMA) buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ total_step += 1 except Exception as e: import traceback traceback.print_exc() break self.res_queue.put(None)
def run(self): total_step = 1 while self.g_ep.value < MAX_EP: # restarts the game s = step(0, 1, self.port)[0] # updates the video frame for matplotlib self.frame.refresh_plot(s) #print("img_array", s) s = feature_vec(s) # feature_vec buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0.0 while True: # have the model make a decision a = self.lnet.choose_action(s) # drive the vehicle s_, r, done = step(a, 0, self.port) # updates the video frame for matplotlib self.frame.refresh_plot(s_) s_ = feature_vec(s) # -1 is one char longer than 1 or 0, so adding a space to 1 and 0 in order to avoid having shaky text if a - 1 == -1: print( "{}, action = {}, reward = {}, episode reward = {}, restart = {}" .format(self.name, a - 1, round(r, 2), round(ep_r, 2), done)) else: print( "{}, action = {}, reward = {}, episode reward = {}, restart = {}" .format(self.name, a - 1, round(r, 2), round(ep_r, 2), done)) ep_r += r buffer_a.append(a) buffer_s.append(s) buffer_r.append(r) # update global and assign to local net if total_step % UPDATE_GLOBAL_ITER == 0 or done: # sync push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_a, buffer_r, GAMMA) buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ total_step += 1 self.res_queue.put(None)
def run(self): total_step = 1 while self.g_ep.value < MAX_EP: s = reset() print("img_array", s) s = feature_vec(s) print("feat", s) # s = self.env.reset() # feature_vec buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0. while True: # if self.name == 'w0': # self.env.render() # feature_vec a = self.lnet.choose_action(s) # a = self.lnet.choose_action(v_wrap(s[None, :])) s_, r, done = step(a) # s_, r, done, _ = self.env.step(a) # feature_vec print("a", a) print("s_", s_) print("r", r) print("done", done) if done: r = -1 ep_r += r buffer_a.append(a) buffer_s.append(s) buffer_r.append(r) # update global and assign to local net if total_step % UPDATE_GLOBAL_ITER == 0 or done: # sync push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_a, buffer_r, GAMMA) buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ total_step += 1 self.res_queue.put(None)
def run(self): ep = 0 while self.g_ep.value < 100: # total_step = 1 s = self.env.reset() buffer_s, buffer_a, buffer_r = [], [], [] ep_r = [0. for i in range(self.agent_num)] for step in range(1000): # print(ep) # if self.name == 'w00' and self.g_ep.value%10 == 0: # path = "/Users/xue/Desktop/temp/temp%d"%self.g_ep.value # if not os.path.exists(path): # os.mkdir(path) # self.env.render(path) s0 = s[0] a0, prob0 = self.lnet[0].choose_action(v_wrap(s0[None, :]), True) a0 = [a0] s = [np.concatenate((s[i],np.array(a0)),-1) for i in range(1, self.agent_num)] s = [s0] + s a = [self.lnet[i].choose_action(v_wrap(s[i][None, :]), True) for i in range(1, self.agent_num)] prob = [elem[1] for elem in a] a = a0 + [elem[0] for elem in a] s_, r, done, _ = self.env.step(a,need_argmax=False) # print(a) # if done[0]: r = -1 ep_r = [ep_r[i] + r[i] for i in range(self.agent_num)] x = self._influencer_reward(r[0], self.lnet[1:], prob0, a0, s[1:], prob) r = [float(i) for i in r] r[0] += x.numpy() buffer_a.append(a) buffer_s.append(s) buffer_r.append(r) if step % 5 == 0 and step != 0: # update global and assign to local net _s0 = s_[0] a0 = self.lnet[0].choose_action(v_wrap(_s0[None, :]), False) a0 = [a0] _s = [np.concatenate((s_[i], np.array(a0)), -1) for i in range(1, self.agent_num)] _s = [_s0] + _s # sync done = [False for i in range(self.agent_num)] [push_and_pull(self.opt[i], self.lnet[i], self.gnet[i], done[i], _s[i], buffer_s, buffer_a, buffer_r, self.GAMMA, i) for i in range(self.agent_num)] [self.scheduler_lr[i].step() for i in range(self.agent_num)] buffer_s, buffer_a, buffer_r = [], [], [] # if ep == 999: # done and print information # record(self.g_ep, self.g_ep_r, sum(ep_r), self.res_queue, self.name) # break s = s_ # total_step += 1 print('ep%d'%ep, self.name, sum(ep_r)) ep+=1 if self.name == "w00": self.sender.send([sum(ep_r),ep]) self.res_queue.put(None)
def run(self): total_step = 1 while self.g_ep.value < MAX_EP: s = np.transpose(self.env.reset(), (2, 0, 1)) / 255.0 lives = self.lives_sum buffer_s, buffer_a, buffer_r = [], [], [] self.ep_r = 0. actions = [] while True: total_step += 1 self.env.render() a = self.lnet.choose_action(v_wrap(s[None, :])) # a = np.random.randint(low = 0, high = 8) actions.append(str(a)) s_, r, done, info = self.env.step(a) s_ = np.transpose(s_, (2, 0, 1)) / 255.0 livesLeft = info[ 'ale.lives'] # punish everytime the agent loses life if livesLeft != lives: r = DIE_PENALTY lives = livesLeft self.ep_r += r buffer_a.append(a) buffer_s.append(s) buffer_r.append(r) if total_step % UPDATE_GLOBAL_ITER == 0 or done: # update global and assign to local net # sync # if self.name == 'w0': # self.env.render() push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_a, buffer_r, GAMMA) buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information record(self.g_ep, self.ep_r, self.res_queue, self.name, self.lives_sum, DIE_PENALTY) break s = s_ self.res_queue.put(None)
def run(self): total_step = 1 while self.g_ep.value < MAX_EP: ################################ # Initial State # ################################ a = np.array([100, 0]) s = torch.tensor([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]).reshape( (1, N_S)).unsqueeze(0) real_state = np.array([]) ######################### buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0. for t in range(MAX_EP_STEP): a = self.lnet.choose_action(s) r, real_state_, s_ = self.env(a, real_state, t) r = np.expand_dims(np.expand_dims(r, 0), 0) s_ = s_.reshape((1, N_S)).unsqueeze(0).float() ep_r += r buffer_a.append(np.array(a)) buffer_s.append(s.squeeze().numpy()) buffer_r.append(r.squeeze()) done = False if t == MAX_EP_STEP - 1: done = True if total_step % UPDATE_GLOBAL_ITER == 0 or done: # update global and assign to local net # sync push_and_pull(self.opt, self.lnet, self.gnet, done, s_, buffer_s, buffer_a, buffer_r, GAMMA) buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break s = s_ real_state = real_state_ total_step += 1 self.res_queue.put(None)
def run(self): total_step = 1 while self.global_episode.value < self.max_episodes: s = self.env.reset() episode_reward = 0.0 s_list, a_list, r_list = [], [], [] while True: s_tensor = to_tensor(expand_dim(s)) a = self.local_net.choose_action(s_tensor) s_, r, done, _ = self.env.step(a) r = modify_reward(s) episode_reward += r s_list.append(s) a_list.append(a) r_list.append(r) # update the global net and its local net if total_step % self.update_global_iter == 0 or done: push_and_pull(self.global_net, self.local_net, self.global_opt, s_, done, s_list, a_list, r_list, self.gamma) s_list, a_list, r_list = [], [], [] if done: self.update_global_values(episode_reward) print('%s: episode: %d, reward: %.2f' % (self.name, self.global_episode.value, self.global_rewards.value)) break s = s_ total_step += 1 self.result_queue.put('done')
def run(self): ep = 0 while self.g_ep.value < 100: # total_step = 1 s = self.env.reset() buffer_s, buffer_a, buffer_r = [], [], [] ep_r = [0. for i in range(self.agent_num)] for step in range(1000): # print(ep) # if self.name == 'w00' and self.g_ep.value%10 == 0: # path = "/Users/xue/Desktop/temp/temp%d"%self.g_ep.value # if not os.path.exists(path): # os.mkdir(path) # self.env.render(path) a = [self.lnet[i].choose_action(v_wrap(s[i][None, :])) for i in range(self.agent_num)] s_, r, done, _ = self.env.step(a, need_argmax=False) # print(a) # if done[0]: r = -1 ep_r = [ep_r[i] + r[i] for i in range(self.agent_num)] buffer_a.append(a) buffer_s.append(s) buffer_r.append(r) if step % 5 == 0: # update global and assign to local net # sync done = [False for i in range(self.agent_num)] [push_and_pull(self.opt[i], self.lnet[i], self.gnet[i], done[i], s_[i], buffer_s, buffer_a, buffer_r, self.GAMMA, i) for i in range(self.agent_num)] [self.scheduler_lr[i].step() for i in range(self.agent_num)] buffer_s, buffer_a, buffer_r = [], [], [] # if ep == 999: # done and print information # record(self.g_ep, self.g_ep_r, sum(ep_r), self.res_queue, self.name) # break s = s_ # total_step += 1 print('ep%d' % ep, self.name, sum(ep_r)) ep += 1 if self.name == "w00": self.sender.send([sum(ep_r), ep]) self.res_queue.put(None)
def run(self): episode_size = 1000 num_episode = 0 num_block = 0 num_request = 0 ep_block = 0 blocking_rate_list = [] r_a = [] r_s = [] # 将窗口类实例化 viz = Visdom() # 创建窗口并初始化 viz.line([0.], [0], win='train_loss', opts=dict(title='train_loss')) buffer_s, buffer_a, buffer_r, buffer_b, buffer_n = [], [], [], [], [] while self.g_ep.value < MAX_EP: # while num_episode < 10: time_to = 1 for i in range(len(buffer_n)): buffer_n[i] += 1 flag = self.release_request(time_to) if flag == 1: # rearrangement for i in range(len(self.service_list)): g = data_set(self.service_list[i], self.G) # for src, dst in edge_list: # g.edges[src, dst].data['h'] = torch.sum( # torch.index_select(g.ndata['feat'], dim=0, index=torch.tensor([src, dst])), dim=0).view(1,state_dim) # inputs = g.edata['h'] #inputs = g.ndata['feat'] inputs = g.edata['feat'] buffer_s.append([g, inputs]) action = self.lnet.get_action(g, inputs) block,edge_state = self.attempt(self.service_list[i], action) #block = self.attempt_n(self.service_list[i], action) #print('block:',block) r_a.append(action.numpy()) r_s.append(edge_state) #r_s.append([self.service_list[i][1:3]]) buffer_a.append(action) buffer_b.append(block) buffer_r.append(-block) buffer_n.append(1) num_request += 1 #print(num_request) mode = random.randint(0, 2) #mode = 0 if mode == 0: # a multicast session first appears source, destination, bandwidth, t = self.random_request() #source, destination, bandwidth, t = 0, [1,2], 1, random.randint(1,100) path_tree = self.method(self.G, source, destination, bandwidth) if len(path_tree) == 0: num_block += 1 for i in range(len(buffer_b)): buffer_b[i] += 1 ep_block += 1 else: self.service_list.append([path_tree, source, destination, bandwidth, t]) self.update_request(path_tree) elif mode == 1: # a new member d to join i, d = self.node_join() if i != -1: tree, source, destination, bandwidth, _ = self.service_list[i] flag = 0 for path, _, _ in tree: if d == path[0] or d == path[-1]: flag = 1 if flag == 1: self.service_list[i][2].append(d) else: p = [] for u in ([source] + destination): # p.append(self.path_map[u, d]) p.append(KSP_FA(self.G, self.K_path_map[u][d], bandwidth)) p = sorted(p, key=lambda x: (x[2], x[3])) path, start_f, _, len_path = p[0] # path = p[0][0] # len_path = p[0][1] # len_path = RM.get_path_len(self.G, path) len_fs = RM.modulation_level(bandwidth, len_path) # start_f = RM.SP_FF(self.G, path, len_fs) if start_f == -1: num_block += 1 for i in range(len(buffer_b)): buffer_b[i] += 1 ep_block += 1 else: self.update_fs(path, len_fs, start_f) self.service_list[i][0].append([path, len_fs, start_f]) self.service_list[i][2].append(d) else: # a member d to leave i, d = self.node_leave() if i != -1: tree, source, destination, bandwidth, _ = self.service_list[i] flag = 0 for path, _, _ in tree: if path[0] == d: flag = 1 if flag == 1: # d has downstream members self.service_list[i][2].remove(d) else: self.service_list[i][2].remove(d) for path, len_fs, start_f in tree: if path[-1] == d: self.release_fs(path, len_fs, start_f) self.service_list[i][0].remove([path, len_fs, start_f]) if len(buffer_s) >= 2 * self.batch_size - 1: # for i in range(self.batch_size): # buffer_b[i] = buffer_b[i] / buffer_n[i] loss = push_and_pull(self.opt, self.lnet, self.gnet, buffer_s[:self.batch_size], buffer_a[:self.batch_size], buffer_r[:self.batch_size], GAMMA) #record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) viz.line([[float(loss), np.mean(buffer_r[:self.batch_size])]], [self.g_ep.value], win='train_loss', update='append') del buffer_s[:self.batch_size] del buffer_a[:self.batch_size] del buffer_r[:self.batch_size] del buffer_b[:self.batch_size] del buffer_n[:self.batch_size] with self.g_ep.get_lock(): self.g_ep.value += 1 time.sleep(0.5) if num_request % episode_size == 0: num_episode += 1 print("Ep: {}, Blocking P: {}, Ep Bp: {}".format(num_episode, num_block / num_request, ep_block / episode_size)) blocking_rate_list.append(num_block / num_request) ep_block = 0 self.release_request(1000) self.res_queue.put(None) np.save("training_logs/r_a"+self.name, r_a) np.save("training_logs/r_s"+self.name, r_s) return num_block / num_request
def run(self): episode_size = 1000 num_episode = 0 num_block = 0 num_request = 0 ep_block = 0 blocking_rate_list = [] t = 0 num_session = 0 num_session_ep = 0 num_rerouting = 0 num_rerouting_ep = 0 occup_rate = [] fragment = [] r_a = [] r_s = [] # 将窗口类实例化 viz = Visdom() # 创建窗口并初始化 # viz.line([0.], [0], win='train_loss', opts=dict(title='train_loss')) buffer_s, buffer_a, buffer_r, buffer_b, buffer_n = [], [], [], [], [] b = 0 n = 0 while self.g_ep.value < MAX_EP: # while num_episode < 10: time_to = round(np.random.exponential(30)) t += time_to flag = self.release_request(time_to) #if flag == 1: # rearrangement if t >= 1000: if len(buffer_r) != 0: buffer_r[-1] = 1 - b / n b = 0 n = 0 state, q_list, q_list_new = self.get_state( self.service_list, self.G) action = self.lnet.choose_action( torch.tensor(state, dtype=torch.float32)) num_r = self.conduct(action, q_list, q_list_new) num_rerouting += num_r num_rerouting_ep += num_r buffer_s.append(state) buffer_a.append(action) buffer_r.append(0) # buffer_b.append(0) # buffer_n.append(0) s_, _, _ = self.get_state(self.service_list, self.G) t -= 1000 num_request += 1 n += 1 block = self.new_request() b += block # for i in range(len(buffer_b)): # buffer_b[i] += block # buffer_n[i] += 1 num_session += (1 - block) num_session_ep += (1 - block) num_block += block ep_block += block self.request_join() self.request_leave() if len(buffer_s) >= 2 * self.batch_size - 1: # for i in range(self.batch_size): # buffer_r[i] = 1 - buffer_b[i] / buffer_n[i] br = discount_reward(buffer_r, GAMMA, self.batch_size) loss = push_and_pull(self.opt, self.lnet, self.gnet, s_, buffer_s[:self.batch_size], buffer_a[:self.batch_size], br, GAMMA) #record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) viz.line([float(loss)], [self.g_ep.value], win='train_loss', update='append', opts=dict(title='train_loss', legend=['train_loss'])) viz.line([np.mean(buffer_r[:self.batch_size])], [self.g_ep.value], win='reward', update='append', opts=dict(title='reward', legend=['reward'])) del buffer_s[:self.batch_size] del buffer_a[:self.batch_size] del buffer_r[:self.batch_size] del buffer_b[:self.batch_size] del buffer_n[:self.batch_size] with self.g_ep.get_lock(): self.g_ep.value += 1 time.sleep(0.5) if num_request % episode_size == 0: num_episode += 1 print("Ep: {}, Blocking P: {}, Ep Bp: {}".format( num_episode, num_block / num_request, ep_block / episode_size)) blocking_rate_list.append(num_block / num_request) o, f = self.statistical() occup_rate.append(o) fragment.append(f) print(" occupancy rate : {}, Ep: {}".format( np.mean(occup_rate), o)) print(" fragment: {}, Ep: {}".format( np.mean(fragment), f)) print(" num_rerouting: {}, Ep: {}".format( num_rerouting / num_session, num_rerouting_ep / num_session_ep)) #print(" r_0 r_1:{}".format(self.agent.sta_0_1(r_a))) #r_a.clear() ep_block = 0 num_rerouting_ep = 0 num_session_ep = 0 self.release_request(1000) self.res_queue.put(None) # np.save("training_logs/r_a"+self.name, r_a) # np.save("training_logs/r_s"+self.name, r_s) return num_block / num_request
def run(self): total_step = 1 while self.g_ep.value < MAX_EP: # get video ----------------------------- while(True): video_random = random.random() videoName = "" for i in range(len(self.videoList)): if video_random < self.videoList[i][1]: videoName = self.videoList[i - 1][0] break if videoName == "": videoName = self.videoList[-1][0] else: break # get video ----------------------------- busyList = self.get_busyTrace() bandwidth_fileName, rtt = self.getBandwidthFile() reqBI = self.client.init(videoName, bandwidth_fileName, rtt, self.bwType) # mask--------------------------- mask = [1] * A_DIM randmCachedBICount = random.randint(1, 5) BI = [0,1,2,3,4] randomCachedBI = random.sample(BI, randmCachedBICount) for bIndex in range(5): if bIndex not in randomCachedBI: mask[bIndex] = 0 # mask--------------------------- segNum = 0 buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0. busy = busyList[segNum%len(busyList)] state_ = np.zeros(S_LEN) state = state_.copy() #state =[reqBitrate, lastBitrate, buffer, hThroughput, mThroughput, busy, mask] reqBitrate, lastBitrate, buffer, hThroughput, mThroughput, reward, reqBI, done, segNum, busy = [0] * 10 # start one epoch ********************************** while True: if sum(mask) == 1: a = mask.index(1) break # lnet.chose_action **************************** a, logits = self.lnet.choose_action(mask, v_wrap(state[None, :])) # lnet.choose_action **************************** # print -------------------------------------------- if platform.system() == "Linux": if random.randint(0,1000) == 1: print("reqb=", reqBitrate, "lb=", lastBitrate, "buffer=", int(buffer), "hT=", int(hThroughput), "mT=", int(mThroughput), "busy=", round(busy, 2), "mask=",mask, "action=", a, "reqBI=", reqBI, "reward=",round(reward,2), "logits=", logits) else: print("reqb=", reqBitrate, "lb=", round(lastBitrate,2), "buffer=", int(buffer), "hT=", int(hThroughput), "mT=", int(mThroughput), "busy=", round(busy, 2), "mask=",mask, "action=", a, "reqBI=", reqBI, "reward=",round(reward,2), "logits=", logits) # print -------------------------------------------- busy = busyList[segNum % len(busyList)] # client.run **************************** if a == 5: hitFlag = False else: hitFlag = True reqBitrate, lastBitrate, buffer, hThroughput, mThroughput, reward, reqBI, done, segNum = self.client.run(a, busy, hitFlag) # client.run **************************** state_[0] = reqBitrate / BITRATES[-1] state_[1] = lastBitrate / BITRATES[-1] state_[2] = (buffer/1000 - 30) / 10 state_[3] = (hThroughput - throughput_mean) / throughput_std state_[4] = (mThroughput - throughput_mean) / throughput_std print(state) # state_[5] = (busy - busy_mean) / busy_std reward = reward / 5 ep_r += reward buffer_a.append(a) buffer_s.append(state) buffer_r.append(reward) if total_step % UPDATE_GLOBAL_ITER == 0 or done: # update global and assign to local net # sync push_and_pull(self.opt, self.lnet, self.gnet, done, state_, buffer_s, buffer_a, buffer_r, GAMMA) buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information record(self.g_ep, self.g_ep_r, ep_r, self.res_queue, self.name) break state = state_.copy() total_step += 1 self.res_queue.put(None) print("end run")
def run(self): ptitle('Training Agent: {}'.format(self.rank)) config = self.config check_point_episodes = config["check_point_episodes"] check_point_folder = os.path.join(config["check_point_folder"], config["env"]) setup_worker_logging(self.log_queue) self.env = create_env(config["env"], self.seed) observation_space = self.env.observation_space action_space = IdToAct(self.env.action_space) with open(os.path.join("data", f"{config['env']}_action_space.npz"), 'rb') as f: archive = np.load(f) action_space.init_converter(all_actions=archive[archive.files[0]]) self.action_space = action_space all_actions = np.array(action_space.all_actions) self.local_net = Net(self.state_size, self.action_mappings, self.action_line_mappings) # local network self.local_net = cuda(self.gpu_id, self.local_net) total_step = 1 l_ep = 0 while self.g_ep.value < self.num_episodes: self.print( f"{self.env.name} - {self.env.chronics_handler.get_name()}") if isinstance(self.env, MultiMixEnvironment): obs = self.env.reset(random=True) else: obs = self.env.reset() maintenance_list = obs.time_next_maintenance + obs.duration_next_maintenance s = self.convert_obs(observation_space, obs) s = v_wrap(s[None, :]) s = cuda(self.gpu_id, s) buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0. ep_step = 0 ep_agent_num_dmd = 0 ep_agent_num_acts = 0 while True: rho = obs.rho.copy() rho[rho == 0.0] = 1.0 lines_overload = rho > config["danger_threshold"] expert_act = expert_rules(self.name, maintenance_list, ep_step, action_space, obs) if expert_act is not None: a = np.where(all_actions == expert_act)[0][0] choosen_actions = np.array([a]) #print(f"Expert act: {a}") elif not np.any(lines_overload): choosen_actions = np.array([0]) else: lines_overload = cuda( self.gpu_id, torch.tensor(lines_overload.astype(int)).float()) attention = torch.matmul(lines_overload.reshape(1, -1), self.action_line_mappings) attention[attention > 1] = 1 choosen_actions = self.local_net.choose_action( s, attention, self.g_num_candidate_acts.value) ep_agent_num_dmd += 1 obs_previous = obs a, obs_forecasted, obs_do_nothing = forecast_actions( choosen_actions, self.action_space, obs, min_threshold=0.95) logging.info(f"{self.name}_act|||{a}") act = self.action_space.convert_act(a) obs, r, done, info = self.env.step(act) r = lreward(a, self.env, obs_previous, obs_do_nothing, obs_forecasted, obs, done, info, threshold_safe=0.85) if a > 0: if r > 0: print("+", end="") elif r < 0: print("-", end="") elif len(choosen_actions) > 0: print("*", end="") else: print("x", end="") else: if len(choosen_actions) > 0: print("o", end="") else: print("0", end="") if r > 0: ep_agent_num_acts += 1 s_ = self.convert_obs(observation_space, obs) s_ = v_wrap(s_[None, :]) s_ = cuda(self.gpu_id, s_) ep_r += r buffer_a.append(a) buffer_s.append(s) buffer_r.append(r) if total_step % self.update_global_iter == 0 or done: # update global and assign to local net # sync # if len(buffer_r) > 0 and np.mean(np.abs(buffer_r)) > 0: buffer_a = cuda(self.gpu_id, torch.tensor(buffer_a, dtype=torch.long)) buffer_s = cuda(self.gpu_id, torch.cat(buffer_s)) push_and_pull(self.opt, self.local_net, check_point_episodes, check_point_folder, self.g_ep, l_ep, self.name, self.rank, self.global_net, done, s_, buffer_s, buffer_a, buffer_r, self.gamma, self.gpu_id) buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information print("") record(config["starting_num_candidate_acts"], config["num_candidate_acts_decay_iter"], self.g_ep, self.g_step, self.g_num_candidate_acts, self.g_ep_r, ep_r, self.res_queue, self.name, ep_step, ep_agent_num_dmd, ep_agent_num_acts) break s = s_ total_step += 1 ep_step += 1 l_ep += 1 self.res_queue.put(None)