def main(): cfg = ConfigParser() cfg.read('config.ini') IP = cfg.get('server', 'ip') PORT = cfg.getint('server', 'port') FILE = cfg.get('file', 'file') SIZE = cfg.getint('env', 'buffer_size') TIME = cfg.getfloat('env', 'time') sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((IP, PORT)) fd = sock.fileno() io = io_thread(sock=sock, filename=FILE, buffer_size=SIZE) mpsched.persist_state(fd) io.start() my_env = env(fd=fd, buff_size=SIZE, time=TIME, k=4, l=0.01, n=0.03, p=0.05) state = my_env.reset() while True: action = [] state_nxt, reward, count, recv_buff_size, done = my_env.step(action) if done: break print(reward) print(recv_buff_size) print(count) io.join()
def __init__(self, fd, cfg, memory, explore=True): threading.Thread.__init__(self) self.fd = fd self.cfg = cfg self.memory = memory self.explore = explore self.agent = torch.load(cfg.get('nafcnn', 'agent')) self.ounoise = OUNoise(action_dimension=1) mpsched.persist_state(fd) self.env = Env(fd=self.fd, time=self.cfg.getfloat('env', 'time'), k=self.cfg.getint('env', 'k'), alpha=self.cfg.getfloat('env', 'alpha'), b=self.cfg.getfloat('env', 'b'), c=self.cfg.getfloat('env', 'c'))
def reset(self): mpsched.persist_state(self.fd) time.sleep(1) self.last = mpsched.get_sub_info(self.fd) for i in range(self.k): subs = mpsched.get_sub_info(self.fd) for j in range(len(subs)): self.tp[j].append(subs[j][0] - self.last[j][0]) self.rtt[j].append(subs[j][1] - self.last[j][1]) self.cwnd[j].append(subs[j][2]) self.last = subs time.sleep(self.time) mate = mpsched.get_meta_info(self.fd) self.recv_buff_size = mate[0] self.rr = mate[1] return [self.tp[0] + self.rtt[0] + self.cwnd[0] + [self.recv_buff_size, self.rr], self.tp[1] + self.rtt[1] + self.cwnd[1]+ [self.recv_buff_size, self.rr]]
def reset(self): mpsched.persist_state(self.fd) """time.sleep()""" self.last = mpsched.get_sub_info(self.fd) self.rr.append(self.last[0][3]) self.rr.append(self.last[1][3]) self.in_flight.append(self.last[0][4]) self.in_flight.append(self.last[1][4]) for i in range(self.k): subs = mpsched.get_sub_info(self.fd) for j in range(len(subs)): self.tp[j].append(subs[j][0] - self.last[j][0]) self.rtt[j].append(subs[j][1]) self.dRtt[j].append(subs[j][1] - self.last[j][1]) self.cwnd[j].append(subs[j][2]) self.rr[j] = subs[j][3] self.in_flight[j] = subs[j][4] self.last = subs time.sleep(self.time) return [self.tp[0]+self.rtt[0]+self.cwnd[0]+[self.rr[0], self.in_flight[0]], self.tp[1]+self.rtt[1]+self.cwnd[1]+[self.rr[1], self.in_flight[1]]]
def main(): cfg = ConfigParser() cfg.read('config.ini') IP = cfg.get('server', 'ip') PORT = cfg.getint('server', 'port') FILE = cfg.get('file', 'file') SIZE = cfg.getint('env', 'buffer_size') TIME = cfg.getfloat('env', 'time') EPISODE = cfg.getint('env', 'episode') parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') parser.add_argument('--gamma', type=float, default=0.99, metavar='G', help='discount factor for reward (default: 0.99)') parser.add_argument('--tau', type=float, default=0.001, metavar='G', help='discount factor for model (default: 0.001)') parser.add_argument('--noise_scale', type=float, default=0.3, metavar='G', help='initial noise scale (default: 0.3)') parser.add_argument('--final_noise_scale', type=float, default=0.3, metavar='G', help='final noise scale (default: 0.3)') parser.add_argument('--exploration_end', type=int, default=100, metavar='N', help='number of episodes with noise (default: 100)') parser.add_argument('--hidden_size', type=int, default=128, metavar='N', help='number of hidden size (default: 128)') parser.add_argument('--replay_size', type=int, default=1000000, metavar='N', help='size of replay buffer (default: 1000000)') parser.add_argument('--updates_per_step', type=int, default=5, metavar='N', help='model updates per simulator step (default: 5)') parser.add_argument('--batch_size', type=int, default=64, metavar='N', help='batch size (default: 128)') sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((IP, PORT)) fd = sock.fileno() my_env = env(fd=fd, buff_size=SIZE, time=TIME, k=8, l=0.01, n=0.03, p=0.05) mpsched.persist_state(fd) args = parser.parse_args() agent = NAF_CNN(args.gamma, args.tau, args.hidden_size, my_env.observation_space.shape[0], my_env.action_space) memory = ReplayMemory(args.replay_size) ounoise = OUNoise(my_env.action_space.shape[0]) rewards = [] times = [] for i_episode in range(EPISODE): if (i_episode < 0.9 * EPISODE): # training io = io_thread(sock=sock, filename=FILE, buffer_size=SIZE) io.start() state = my_env.reset() ounoise.scale = (args.noise_scale - args.final_noise_scale) * max( 0, args.exploration_end - i_episode) / args.exploration_end + args.final_noise_scale ounoise.reset() print(state) episode_reward = 0 while True: state = torch.FloatTensor(state) #print("state: {}\n ounoise: {}".format(state, ounoise.scale)) action = agent.select_action(state, ounoise) #print("action: {}".format(action)) next_state, reward, count, recv_buff_size, done = my_env.step( action) #print("buff size: ",recv_buff_size) #print("reward: ", reward) episode_reward += reward action = torch.FloatTensor(action) mask = torch.Tensor([not done]) next_state = torch.FloatTensor(next_state) reward = torch.FloatTensor([float(reward)]) memory.push(state, action, mask, next_state, reward) state = next_state if len(memory) > args.batch_size * 5: for _ in range(args.updates_per_step): transitions = memory.sample(args.batch_size) batch = Transition(*zip(*transitions)) #print("update",10*'--') agent.update_parameters(batch) if done: break rewards.append(episode_reward) io.join() else: # testing io = io_thread(sock=sock, filename=FILE, buffer_size=SIZE) io.start() state = my_env.reset() episode_reward = 0 start_time = time.time() while True: state = torch.FloatTensor(state) #print("state: {}\n".format(state)) action = agent.select_action(state) #print("action: {}".format(action)) next_state, reward, count, done = my_env.step(action) episode_reward += reward state = next_state if done: break rewards.append(episode_reward) times.append(str(time.time() - start_time) + "\n") io.join() #print("Episode: {}, noise: {}, reward: {}, average reward: {}".format(i_episode, ounoise.scale, rewards[-1], np.mean(rewards[-100:]))) fo = open("times.txt", "w") fo.writelines(lines) fo.close() sock.close()