def main(): np.random.seed(RANDOM_SEED) if not mp_util.MP_ENABLED: assert len(VIDEO_BIT_RATE) == A_DIM # create result directory if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) # inter-process communication queues net_params_queues = [] exp_queues = [] for i in xrange(NUM_AGENTS): net_params_queues.append(mp.Queue(1)) exp_queues.append(mp.Queue(1)) # create a coordinator and multiple agent processes # (note: threading is not desirable due to python GIL) coordinator = mp.Process(target=central_agent, args=(net_params_queues, exp_queues)) coordinator.start() all_cooked_time, all_cooked_bw, _ = load_trace.load_trace(TRAIN_TRACES) if mp_util.MP_ENABLED: all_cooked_time_lte, all_cooked_bw_lte, _ = load_trace.load_trace( TRAIN_TRACES_LTE) both_cooked_time = {} both_cooked_time['wifi'] = all_cooked_time both_cooked_time['lte'] = all_cooked_time_lte both_cooked_bw = {} both_cooked_bw['wifi'] = all_cooked_bw both_cooked_bw['lte'] = all_cooked_bw_lte agents = [] for i in xrange(NUM_AGENTS): if mp_util.MP_ENABLED: agents.append( mp.Process(target=agent, args=(i, both_cooked_time, both_cooked_bw, net_params_queues[i], exp_queues[i]))) else: agents.append( mp.Process(target=agent, args=(i, all_cooked_time, all_cooked_bw, net_params_queues[i], exp_queues[i]))) for i in xrange(NUM_AGENTS): agents[i].start() # wait unit training is done coordinator.join()
def main(): np.random.seed(42) os.system('rm ' + TEST_LOG_PATH) ta_q = Tabular_Q() all_cooked_time, all_cooked_bw, _ = load_trace.load_trace() epoch = 0 time_stamp = 0 net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY state = [0, 0, 0, 0] while True: delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K epoch += 1 bw = float(video_chunk_size) / float( delay) / M_IN_K * BITS_IN_BYTE # Mbit/sec bw = min(int(bw / D_BW) * D_BW, BW_MAX) bf = min(int(buffer_size / D_BF) * D_BF, BF_MAX) br = bit_rate c = min(video_chunk_remain, N_CHUNK - 1) next_state = [bw, bf, br, c] ta_q.train_q(state, bit_rate, reward, next_state, end_of_video) state = next_state last_bit_rate = bit_rate bit_rate = ta_q.get_q_action(state) if end_of_video: last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY state = [0, 0, 0, 0] if epoch % TEST_INTERVAL == 0: testing(ta_q, epoch) np.save(TEST_LOG_PATH + '_q_table.npy', ta_q.q_table)
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM # create result directory if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) # inter-process communication queues net_params_queues = [] exp_queues = [] for i in xrange(NUM_AGENTS): net_params_queues.append(mp.Queue(1)) exp_queues.append(mp.Queue(1)) # create a coordinator and multiple agent processes # (note: threading is not desirable due to python GIL) coordinator = mp.Process(target=central_agent, args=(net_params_queues, exp_queues)) coordinator.start() trace_index = np.random.randint(1, 65) all_cooked_time, all_cooked_bw = load_trace.load_trace(trace_index) agents = [] for i in xrange(NUM_AGENTS): agents.append( mp.Process(target=agent, args=(i, all_cooked_time, all_cooked_bw, net_params_queues[i], exp_queues[i]))) for i in xrange(NUM_AGENTS): agents[i].start() # wait unit training is done coordinator.join()
def __init__(self): self.args = EnvArgs() all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( self.args.test_bw_trace) super().__init__(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, random_seed=self.args.random_seed, VIDEO_SIZE_FILE=self.args.test_video_size_files, logfile_path='./log/', Debug=False) self.state_gop = np.zeros( (self.args.s_gop_info, self.args.s_gop_len)) # state info for past gops self.last_bit_rate = 0 self.reward_gop = 0 self.last_reward_gop = 0 self.action_map = self._set_action_map() self.time_intervals = [] self.send_data_sizes = [] self.frame_types = [] self.frame_time_lens = [] self.real_qualitys = [] self.buffer_sizes = [] self.end_delays = [] self.rebuf_time = 0 self.call_time = 0 self.switch_num = 0 self.gop_sizes = [[0] * 17, [0] * 17] # info for traces self.traces_len = len(all_file_names)
def main(): np.random.seed(RANDOM_SEED) # create result directory if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) # inter-process communication queues net_params_queues = [] exp_queues = [] for i in xrange(NUM_AGENTS): net_params_queues.append(mp.Queue(1)) exp_queues.append(mp.Queue(1)) # create a coordinator and multiple agent processes # (note: threading is not desirable due to python GIL) coordinator = mp.Process(target=central_agent, args=(net_params_queues, exp_queues)) coordinator.start() all_user_pos, _ = load_trace.load_trace(TRAIN_TRACES) agents = [] for i in xrange(NUM_AGENTS): agents.append( mp.Process(target=agent, args=(i, all_user_pos, net_params_queues[i], exp_queues[i]))) for i in xrange(NUM_AGENTS): agents[i].start() # wait unit training is done coordinator.join()
def __init__(self, random_seed=RANDOM_SEED): np.random.seed(random_seed) all_cooked_time, all_cooked_bw, _ = load_trace.load_trace() self.net_env = abrenv.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, random_seed=random_seed) self.last_bit_rate = DEFAULT_QUALITY self.buffer_size = 0. self.state = np.zeros((S_INFO, S_LEN)) self.reset()
def main(): alpha = CNO_PARA_LOSS_RATE actor_learning_rate = ACTOR_LR_RATE bg_traffic_pattern = 0.0 # model_1 link_capacity = 20000000 # 20Mbps try: alpha = sys.argv[1] bg_traffic_pattern = sys.argv[2] actor_learning_rate = sys.argv[3] link_capacity = sys.argv[4] except Exception as ex: print ("Not all inputs has set via cmd -> alpha[{0}] bg_tp[{1}] a_lr[{2}] lc[{3}]".format(alpha, actor_learning_rate, bg_traffic_pattern, link_capacity)) np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM # create result directory if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) # inter-process communication queues net_params_queues = [] exp_queues = [] for i in range(NUM_AGENTS): net_params_queues.append(mp.Queue(1)) exp_queues.append(mp.Queue(1)) # create a coordinator and multiple agent processes # (note: threading is not desirable due to python GIL) coordinator = mp.Process(target=central_agent, args=(net_params_queues, exp_queues)) coordinator.start() all_cooked_time, all_cooked_bw, _ = load_trace.load_trace(TRAIN_TRACES) agents = [] for i in range(NUM_AGENTS): agents.append(mp.Process(target=agent, args=(i, all_cooked_time, all_cooked_bw, net_params_queues[i], exp_queues[i]))) for i in range(NUM_AGENTS): agents[i].start() # wait unit training is done coordinator.join()
def __init__(self, random_seed=RANDOM_SEED): np.random.seed(RANDOM_SEED) self.action_space = spaces.Discrete(A_DIM) self.observation_space = spaces.Box(0, 10.0, [S_INFO, S_LEN], dtype=np.float32) all_cooked_time, all_cooked_bw, _ = load_trace.load_trace() self.net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) self.last_bit_rate = DEFAULT_QUALITY self.state = np.zeros((S_INFO, S_LEN)) self.reset()
def __init__(self, time, bandwidth, random_seed=RANDOM_SEED): np.random.seed(random_seed) self.video_chunk_current = 1 self.buffer_current = 0 ## pick a random trace file self.trace_index = np.random.randint(1, 65) self.time, self.bandwidth = load_trace.load_trace(self.trace_index) self.trace_ptr = np.random.randint(1, len(self.bandwidth)) self.last_time = self.time[self.trace_ptr - 1] print(len(self.bandwidth))
def __init__(self, random_seed=RANDOM_SEED): np.random.seed(random_seed) # self.action_space = spaces.Box( # low=0., high=60., shape=(2,), dtype=np.float32) # self.observation_space = spaces.Box( # 0, 10.0, (S_LEN * S_INFO,), dtype=np.float32) all_cooked_time, all_cooked_bw, _ = load_trace.load_trace() self.net_env = abrenv.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, random_seed=RANDOM_SEED) self.last_bit_rate = DEFAULT_QUALITY self.buffer_size = 0. self.state = np.zeros((S_INFO, S_LEN)) self.reset()
def main(arglist): time = datetime.now() np.random.seed(RANDOM_SEED) torch.manual_seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM # create result directory if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) # inter-process communication queues net_params_queues = [] exp_queues = [] for i in range(NUM_AGENTS): net_params_queues.append(mp.Queue(1)) exp_queues.append(mp.Queue(1)) # create a coordinator and multiple agent processes # (note: threading is not desirable due to python GIL) coordinator = mp.Process(target=central_agent, args=(net_params_queues, exp_queues, arglist.model_type)) coordinator.start() all_cooked_time, all_cooked_bw, _ = load_trace.load_trace(TRAIN_TRACES) agents = [] for i in range(NUM_AGENTS): agents.append( mp.Process(target=agent, args=(i, all_cooked_time, all_cooked_bw, net_params_queues[i], exp_queues[i], arglist.model_type))) for i in range(NUM_AGENTS): agents[i].start() # wait unit training is done coordinator.join() for i in range(NUM_AGENTS): agents[i].join() print(str(datetime.now() - time))
def main(): #确定随机数种子 #生成存储神经网络参数和模拟数据的Queue待用(供主/子agent之间传递数据用) #在多进程中分别启动主/子agent,加载文件中的网络状况数据 np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM # create result directory if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) # inter-process communication queues 进程间通信队列 net_params_queues = [] exp_queues = [] for i in xrange(NUM_AGENTS): # 0-15 net_params_queues.append(mp.Queue(1)) # 加入16个agent进程队列? exp_queues.append(mp.Queue(1)) # 加入16个agent进程队列? # create a coordinator and multiple agent processes # (note: threading is not desirable due to python GIL) coordinator = mp.Process( target=central_agent, args=(net_params_queues, exp_queues)) # 创建进程?central_agent是下面的函数,参数是两个队列的列表 coordinator.start() # 开始跑进程? all_cooked_time, all_cooked_bw, _ = load_trace.load_trace( TRAIN_TRACES) # 载入trace数据? agents = [] for i in xrange(NUM_AGENTS): # 0-15 agents.append( mp.Process( target=agent, args=(i, all_cooked_time, all_cooked_bw, net_params_queues[i], exp_queues[i] ))) # 创建进程?agent是下面的函数,参数是agent号,trece数据,对应的两个队列的列表 for i in xrange(NUM_AGENTS): # 开始跑进程? agents[i].start() # wait unit training is done coordinator.join()
def main(): np.random.seed(RANDOM_SEED) #generate a random number assert len(BIT_RATE) == A_DIM #if true get 1 else get AssertionError # create result directory if not os.path.exists(SUMMARY_DIR): #create result dictionary os.makedirs(SUMMARY_DIR) # inter-process communication queues net_params_queues = [] exp_queues = [] for i in xrange( NUM_AGENTS ): #the main function is the main process and create queues in parent process net_params_queues.append(mp.Queue(1)) exp_queues.append(mp.Queue(1)) # create a coordinator and multiple agent processes # (note: threading is not desirable due to python GIL) coordinator = mp.Process(target=central_agent, args=(net_params_queues, exp_queues)) coordinator.start() network_trace_dir = './dataset/network_trace/' + NETWORK_TRACE + '/' all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( network_trace_dir) #all_cooked_time, all_cooked_bw, _ = load_trace.load_trace(TRAIN_TRACES) agents = [] for i in xrange(NUM_AGENTS): agents.append( mp.Process(target=agent, args=(i, all_cooked_time, all_cooked_bw, all_file_names, net_params_queues[i], exp_queues[i]))) for i in xrange(NUM_AGENTS): agents[i].start() # wait unit training is done coordinator.join()
def main(): all_cooked_time, all_cooked_bw = load_trace.load_trace() video_size = {} # in bytes for bitrate in xrange(BITRATE_LEVELS): video_size[bitrate] = [] with open(VIDEO_SIZE_FILE + str(bitrate)) as f: for line in f: video_size[bitrate].append(int(line.split()[0])) # assert len(all_cooked_time) == len(all_cooked_bw) # for cooked_data_idx in xrange(len(all_cooked_time)) cooked_time = all_cooked_time[0] cooked_bw = all_cooked_bw[0] # ----------------------------------------- # step 1: quantize the time and bandwidth # ----------------------------------------- total_time_pt = int(np.ceil(cooked_time[-1] / DT)) quan_time = np.linspace(np.floor(cooked_time[0]), np.ceil(cooked_time[-1]), total_time_pt + 1) quan_bw = np.zeros(len(quan_time)) curr_time_idx = 0 for i in xrange(len(quan_bw)): while curr_time_idx < len(cooked_time) - 1 and \ cooked_time[curr_time_idx] < quan_time[i]: curr_time_idx += 1 quan_bw[i] = cooked_bw[curr_time_idx] # ---------------------------------------- # step 2: cap the max time and max buffer # ---------------------------------------- max_video_contents = np.sum(video_size[BITRATE_LEVELS - 1]) # in bytes total_bw = np.sum(quan_bw) * DT # in MBit t_portion = max_video_contents / (total_bw * B_IN_MB * PACKET_PAYLOAD_PORTION / BITS_IN_BYTE) t_max = int(np.ceil(np.ceil(cooked_time[-1]) * t_portion)) t_max_idx = int(np.ceil(t_max / DT)) b_max_idx = t_max_idx full_quan_time = quan_time full_quan_bw = quan_bw for i in xrange(int(np.ceil(t_portion))): full_quan_time = np.append(full_quan_time, (quan_time[1:] + full_quan_time[-1])) full_quan_bw = np.append(full_quan_bw, quan_bw[1:]) quan_time = full_quan_time quan_bw = full_quan_bw assert quan_time[-1] >= t_max # ----------------------------------------------------------- # (optional) step 3: pre=compute the download time of chunks # download_time(chunk_idx, quan_time, bit_rate) # ----------------------------------------------------------- all_download_time = {} # print "Pre-compute the download time table" # all_download_time = get_download_time(total_video_chunks=TOTAL_VIDEO_CHUNCK, # quan_time=quan_time, # quan_bw=quan_bw, # dt=DT, # video_size=video_size, # bitrate_levels=BITRATE_LEVELS) # ----------------------------- # step 4: dynamic programming # ----------------------------- total_reward = {} last_dp_pt = {} # initialization, take default quality at start off download_time = \ restore_or_compute_download_time( all_download_time, 0, 0, DEFAULT_QUALITY, quan_time, quan_bw, DT, video_size) first_chunk_finish_time = download_time + LINK_RTT / M_IN_K first_chunk_finish_idx = int(np.floor(first_chunk_finish_time / DT)) buffer_size = int(VIDEO_CHUNCK_LEN / M_IN_K / DT) total_reward[(0, first_chunk_finish_idx, buffer_size, DEFAULT_QUALITY)] = \ VIDEO_BIT_RATE[DEFAULT_QUALITY] / M_IN_K \ - REBUF_PENALTY * first_chunk_finish_time last_dp_pt[(0, first_chunk_finish_idx, buffer_size, DEFAULT_QUALITY)] = (0, 0, 0, 0) for n in xrange(1, TOTAL_VIDEO_CHUNCK): print n, TOTAL_VIDEO_CHUNCK for t in xrange(t_max_idx): for b in xrange(b_max_idx): for m in xrange(BITRATE_LEVELS): if (n - 1, t, b, m) in total_reward: for new_bit_rate in xrange(BITRATE_LEVELS): download_time = \ restore_or_compute_download_time( all_download_time, n, t, new_bit_rate, quan_time, quan_bw, DT, video_size) buffer_size = quan_time[b] rebuf = np.maximum(download_time - buffer_size, 0.0) r = VIDEO_BIT_RATE[new_bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs( VIDEO_BIT_RATE[new_bit_rate] - VIDEO_BIT_RATE[m]) / M_IN_K buffer_size = np.maximum( buffer_size - download_time, 0.0) buffer_size += VIDEO_CHUNCK_LEN / M_IN_K buffer_idx = int(buffer_size / DT) new_time_idx = int( np.floor((quan_time[t] + download_time + LINK_RTT / M_IN_K) / DT)) new_total_reward = total_reward[(n - 1, t, b, m)] + r if (n, new_time_idx, buffer_idx, new_bit_rate) not in total_reward: total_reward[(n, new_time_idx, buffer_idx, new_bit_rate)] = \ new_total_reward last_dp_pt[(n, new_time_idx, buffer_idx, new_bit_rate)] = \ (n - 1, t, b, m) else: if new_total_reward > total_reward[( n, new_time_idx, buffer_idx, new_bit_rate)]: total_reward[(n, new_time_idx, buffer_idx, new_bit_rate)] = \ new_total_reward last_dp_pt[(n, new_time_idx, buffer_idx, new_bit_rate)] = \ (n - 1, t, b, m) # --------------------------------- # step 5: get the max total reward # --------------------------------- optimal_total_reward = -np.inf end_dp_pt = None for k in total_reward: if k[0] == TOTAL_VIDEO_CHUNCK - 1: if total_reward[k] > optimal_total_reward: optimal_total_reward = total_reward[k] end_dp_pt = last_dp_pt[k] print optimal_total_reward if end_dp_pt is not None: while end_dp_pt != (0, 0, 0, 0): print end_dp_pt end_dp_pt = last_dp_pt[end_dp_pt]
def main(self, args, net_env=None, policy=None): viper_flag = True assert len(VIDEO_BIT_RATE) == A_DIM log_f = LOG_FILE if net_env is None: viper_flag = False all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(args.traces) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) if not viper_flag and args.log: log_path = log_f + '_' + net_env.all_file_names[net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] rollout = [] video_count = 0 reward_sum = 0 in_compute = [] # load dt policy if policy is None: with open(DTModel, 'rb') as f: policy = pk.load(f) while True: # serve video forever delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \ video_chunk_remain = net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms reward = get_reward(bit_rate, rebuf, last_bit_rate, args.qoe_metric) r_batch.append(reward) reward_sum += reward last_bit_rate = bit_rate if args.log: log_file.write(bytes(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n', encoding='utf-8')) log_file.flush() # select bit_rate according to decision tree if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) serialized_state = serial(state) bit_rate = int(policy.predict(np.array(serialized_state).reshape(1, -1))[0]) rollout.append((state, bit_rate, serialized_state)) s_batch.append(state) if end_of_video: if args.log: log_file.write(bytes('\n', encoding='utf-8')) log_file.close() print("video count", video_count) last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here r_batch = [] in_compute = [] if viper_flag: return rollout else: video_count += 1 if video_count >= len(net_env.all_file_names): break if args.log: log_path = log_f + '_' + net_env.all_file_names[net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') return reward_sum
def main(): torch.set_num_threads(1) np.random.seed(RANDOM_SEED) torch.manual_seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( TEST_TRACES) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'w') # all models have same actor network # so model_type can be anything net = ActorNetwork([S_INFO, S_LEN], A_DIM) # restore neural net parameters net.load_state_dict(torch.load(ACTOR_MODEL)) print("Testing model restored.") time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY video_count = 0 state = torch.zeros((S_INFO, S_LEN)) weights = np.array([0.2, 0.3, 0.5]) while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms w1 = weights[0] w2 = weights[1] w3 = weights[2] reward = w1 * VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - w2 * REBUF_PENALTY * rebuf \ - w3 * SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state state = torch.roll(state, -1, dims=-1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = torch.tensor( next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = min( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) with torch.no_grad(): probability = net.forward(state.unsqueeze(0)) m = Categorical(probability) bit_rate = m.sample().item() # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states if end_of_video: weights = np.random.randn(3) # Normalization weights = np.abs(weights) / np.linalg.norm(weights, ord=1) log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here state = torch.zeros((S_INFO, S_LEN)) video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'w')
def main(self, args, net_env=None, policy=None): viper_flag = True assert len(VIDEO_BIT_RATE) == A_DIM log_f = LOG_FILE if net_env is None: viper_flag = False all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(args.traces) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) if args.update: log_f = log_f.replace('dt', 'du') if not viper_flag and args.log: log_path = log_f + '_' + net_env.all_file_names[net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] rollout = [] video_count = 0 reward_sum = 0 in_compute = [] # load dt policy if policy is None: with open(args.dt, 'rb') as f: policy = pk.load(f) policy = fsm.FSM(policy) while True: # serve video forever delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \ video_chunk_remain = net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms reward = get_reward(bit_rate, rebuf, last_bit_rate, args.qoe_metric) r_batch.append(reward) reward_sum += reward last_bit_rate = bit_rate if args.log: log_file.write(bytes(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n', encoding='utf-8')) log_file.flush() # select bit_rate according to decision tree if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) serialized_state = serial(state) bit_rate = int(policy.predict(np.array(serialized_state).reshape(1, -1))[0]) rollout.append((state, bit_rate, serialized_state)) s_batch.append(state) if args.update: chunk_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain - 1) policy.chunk_leaf[chunk_index] = policy.tree.apply(np.array(serialized_state).reshape(1, -1)) if chunk_index < CHUNK_TIL_VIDEO_END_CAP - HORIZON: in_compute.append(fsm.Trajectory(chunk_index, max(0, bit_rate - 1), buffer_size - CHUNK_LEN, last_bit_rate, state, args)) in_compute.append(fsm.Trajectory(chunk_index, bit_rate, buffer_size - CHUNK_LEN, last_bit_rate, state, args)) in_compute.append(fsm.Trajectory(chunk_index, min(5, bit_rate + 1), buffer_size - CHUNK_LEN, last_bit_rate, state, args)) for traj in in_compute: this_chunk_size = video_chunk_size this_delay = delay while True: if traj.apply(this_chunk_size, this_delay) == CHUNK_SWITCH: new_bitrate = int(policy.predict(np.array(serial(traj.states)).reshape(1, -1))[0]) traj.next_chunk(new_bitrate) this_chunk_size, this_delay = traj.trans_msg else: break while len(in_compute) > 1 and in_compute[0].end and in_compute[1].end and in_compute[2].end: r_below = sum([get_reward(in_compute[0].quality[i], in_compute[0].rebuf[i], in_compute[0].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)]) r_normal = sum([get_reward(in_compute[1].quality[i], in_compute[1].rebuf[i], in_compute[1].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)]) r_above = sum([get_reward(in_compute[2].quality[i], in_compute[2].rebuf[i], in_compute[2].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)]) if r_above == max(r_below, r_normal, r_above): policy.update(in_compute[0].chunk_index, 1) elif r_normal == max(r_below, r_normal, r_above): policy.update(in_compute[0].chunk_index, -1) else: policy.update(in_compute[0].chunk_index, 0) in_compute.pop(0) in_compute.pop(0) in_compute.pop(0) if end_of_video: if args.log: log_file.write(bytes('\n', encoding='utf-8')) log_file.close() print("video count", video_count) last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here r_batch = [] in_compute = [] if viper_flag: return rollout else: video_count += 1 if video_count >= len(net_env.all_file_names): break if args.log: log_path = log_f + '_' + net_env.all_file_names[net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') return reward_sum
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace() net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') with torch.no_grad(): model = a3c.ActorCritic(state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=[ACTOR_LR_RATE, CRITIC_LR_RATE],islstm = islstm) nn_model = NN_MODEL if nn_model is not None: # nn_model is the path to file model.load_state_dict(torch.load(nn_model, map_location=torch.device('cpu'))) print("Model restored.") state = torch.zeros(S_INFO, S_LEN) time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = torch.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [torch.zeros(S_INFO, S_LEN)] a_batch = [action_vec] r_batch = [] entropy_record = [] video_count = 0 cx = torch.zeros(1, 128) hx = torch.zeros(1, 128) while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty - smoothness reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write((str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n').encode("utf-8")) log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [torch.zeros((S_INFO, S_LEN))] state = torch.roll(state, -1) # Fill in the state vector with normalization state[0, -1] = torch.Tensor([VIDEO_BIT_RATE[last_bit_rate] / float(max(VIDEO_BIT_RATE))]) # last quality state[1, -1] = torch.Tensor([buffer_size / BUFFER_NORM_FACTOR]) # buffer size state[2, -1] = torch.Tensor([float(video_chunk_size) / float(delay) / M_IN_K]) # kilo byte / ms state[3, -1] = torch.Tensor([float(delay) / M_IN_K / BUFFER_NORM_FACTOR]) # /10 sec state[4, :A_DIM] = torch.Tensor([next_video_chunk_sizes]) / M_IN_K / M_IN_K # mega byte # remaining chunk number state[5, -1] = torch.Tensor([min(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)]) if islstm == 0: logits, value = model(state.unsqueeze(dim=0)) else: logits, value, hx, cx = model((state.unsqueeze(dim=0),hx,cx)) # print(f"index {index}, state {state}, logits {logits}, value {value}",sep="\n") # print(state,logits) try: cate = Categorical(logits) bit_rate = cate.sample().item() except Exception as e: print(e) print(f"walking into an error of all null distribution") print(logits, state) exit() policy = logits log_policy = torch.log(logits) entropy = (policy * log_policy).sum(1, keepdim=True) s_batch.append(state) entropy_record.append(entropy) if end_of_video: log_file.write('\n'.encode("utf-8")) log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] cx = cx.detach() hx = hx.detach() action_vec = torch.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(torch.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] print ("video count", video_count) video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb')
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(TEST_TRACES) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'w') with tf.Session() as sess: actor = libcomyco.libcomyco(sess, S_INFO, S_LEN, A_DIM, LR_RATE = 1e-4) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # save neural net parameters # restore neural net parameters if NN_MODEL is not None: # NN_MODEL is the path to file saver.restore(sess, NN_MODEL) print("Testing model restored.") time_stamp = 0 bit_rate = DEFAULT_QUALITY last_bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] entropy_record = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(int(bit_rate)) time_stamp += delay # in ms time_stamp += sleep_time # in ms if QOE_METRIC == 'lin': # -- lin scale reward -- REBUF_PENALTY = REBUFF_PENALTY_LIN reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K # reward_max = 4.3 else: # -- log scale reward -- REBUF_PENALTY = REBUFF_PENALTY_LOG log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[0])) log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[0])) reward = log_bit_rate \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate) r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) action_prob, _ = actor.predict( np.reshape(state, (-1, S_INFO, S_LEN))) bit_rate = np.argmax(action_prob[0]) s_batch.append(state) entropy_record.append(actor.compute_entropy(action_prob[0])) if end_of_video: log_file.write('\n') log_file.close() bit_rate = DEFAULT_QUALITY # use the default action here last_bit_rate = DEFAULT_QUALITY del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'w')
def main(): # check the constant defination is valid or not assert len(bitRatesOptions) == bitRatesTypes # load the traces allCookedTime, allCookedBW, allFileNames = load_trace.load_trace() # set the environment netEnvironment = env.Environment(all_cooked_time=allCookedTime, all_cooked_bw=allCookedBW) # open the output log file to write outputFileName = outputFilePrefix + "_" + allFileNames[ netEnvironment.trace_idx] outputFilePointer = open(outputFileName, "wb") # initial the local variables timeStamp = 0 lastBitRateOption = defaultBitRateOption currentBitRateOption = defaultBitRateOption videoCount = 0 historyState = np.zeros((stateInfoLength, pastFramesLength)) # enum all possible solutions of future chunks for solution in itertools.product([i for i in range(bitRatesTypes)], repeat=defaultFutureChunkCount): chunkOptionsSet.append(solution) # computing kernel: while True: # get the video chunk according to the current bitrate option assert currentBitRateOption >= 0 delay, sleepTime, currentBufferSize, rebuffer, currentVideoChunkSize, \ nextVideoChunkSize, endFlag, chunkRemainCount = netEnvironment.get_video_chunk(currentBitRateOption) # update the time stamp because of the delay and sleeping time timeStamp += delay + sleepTime # ms # calculate the reward value according to the formula qualityValue = bitRatesOptions[ currentBitRateOption] / bitsFactor # kb to Mb smoothValue = np.abs(bitRatesOptions[currentBitRateOption] \ - bitRatesOptions[lastBitRateOption]) / bitsFactor rewardValue = qualityValue \ - rebufferFactor * rebuffer \ - smoothFactor * smoothValue # write the output file outputItemStr = str(timeStamp / millsecondsPerSecond) + '\t' \ + str(bitRatesOptions[currentBitRateOption]) + '\t' \ + str(currentBufferSize) + '\t' \ + str(rebuffer) + '\t' \ + str(currentVideoChunkSize) + '\t' \ + str(delay) + '\t' \ + str(rewardValue) + '\n' outputFilePointer.write(outputItemStr.encode('utf-8')) outputFilePointer.flush() # update the bit rate option lastBitRateOption = currentBitRateOption # update the history state information like a sliding window historyState = np.roll(historyState, -1, axis=1) historyState[ 0, -1] = bitRatesOptions[currentBitRateOption] / float(maxBitRate) historyState[1, -1] = currentBufferSize / bufferNormFactor historyState[2, -1] = rebuffer historyState[ 3, -1] = float(currentVideoChunkSize) / float(delay) / bitsFactor historyState[4, -1] = np.minimum( chunkRemainCount, defaultChunkCountToEnd) / float(defaultChunkCountToEnd) # MPC kernel begin # calculate the normaliztion estimated error of bandwidth currentError = 0. if (len(pastBWEsts) > 0): currentError = abs(pastBWEsts[-1] - historyState[3, -1]) / float( historyState[3, -1]) pastErrors.append(currentError) # calculate the harmonic mean of last 5 history bandwidths # Step 1: collect the last 5 history bandwidths pastRealBWArray = historyState[3, -5:] while pastRealBWArray[0] == 0.0: pastRealBWArray = pastRealBWArray[1:] # Step 2: calculate the harmonic mean pastRealBWSum = 0.0 for pastRealBWItems in pastRealBWArray: pastRealBWSum += (1 / float(pastRealBWItems)) harmonicBW = 1.0 / (pastRealBWSum / len(pastRealBWArray)) # calculate the predicted future bandwidth according to the est. error and harmonic mean errorIndex = min(5, len(pastErrors)) maxError = float(max(pastErrors[-errorIndex:])) currentPredBW = harmonicBW / (1 + maxError) pastBWEsts.append(currentPredBW) # fixed this bug, reward increases # get the video chunks information of this round prediction currentLastIndex = totalChunksCount - chunkRemainCount currentFutureChunkCount = min(chunkRemainCount, defaultFutureChunkCount) # enumerate all the possible solutions and pick the best one bestReward = -INF bestSolution = () finalOption = -1 startBufferSize = currentBufferSize for solution in chunkOptionsSet: localSolution = solution[0:currentFutureChunkCount] localRebufferTime = 0.0 localCurrentBufferSize = startBufferSize localBitRateSum = 0. localSmoothDiffs = 0. localLastChunkOption = currentBitRateOption # the 5 future chunks loop for pos in range(0, currentFutureChunkCount): thisChunkOption = localSolution[pos] thisIndex = currentLastIndex + pos + 1 thisChunkSize = getChunkSize(thisChunkOption, thisIndex) downloadTime = (float(thisChunkSize) / (bitsFactor * bitsFactor) ) / currentPredBW # Bytes to MBytes if localCurrentBufferSize < downloadTime: localRebufferTime += downloadTime - localCurrentBufferSize localCurrentBufferSize = 0 else: localCurrentBufferSize -= downloadTime # This 4 means the play speed localCurrentBufferSize += 4 localBitRateSum += bitRatesOptions[thisChunkOption] localSmoothDiffs += abs(bitRatesOptions[thisChunkOption] - bitRatesOptions[localLastChunkOption]) localLastChunkOption = thisChunkOption localReward = float(localBitRateSum) / bitsFactor \ - rebufferFactor * localRebufferTime \ - float(localSmoothDiffs) / bitsFactor if localReward >= bestReward: if bestSolution != () and bestSolution[0] < localSolution[0]: bestSolution = localSolution else: bestSolution = localSolution bestReward = localReward if bestSolution != (): finalOption = bestSolution[0] currentBitRateOption = finalOption if endFlag: outputFilePointer.write("\n".encode('utf-8')) outputFilePointer.close() lastBitRateOption = defaultBitRateOption currentBitRateOption = defaultBitRateOption historyState = np.zeros((stateInfoLength, pastFramesLength)) print("video count", videoCount) videoCount += 1 if videoCount >= len(allFileNames): break outputFileName = outputFilePrefix + "_naive_" + allFileNames[ netEnvironment.trace_idx] outputFilePointer = open(outputFileName, "wb")
# randomize the start point of the video # note: trace file starts with time 0 self.mahimahi_ptr = np.random.randint(1, len(self.cooked_bw)) self.last_mahimahi_time = self.cooked_time[self.mahimahi_ptr - 1] next_video_chunk_sizes = [] for i in range(BITRATE_LEVELS): next_video_chunk_sizes.append( self.video_size[i][self.video_chunk_counter]) # delay - time from click until start to play # sleep_time - sleep when buffer size > buffer_tresh # buffer size # rebufed time return delay, \ sleep_time, \ return_buffer_size / MILLISECONDS_IN_SECOND, \ rebuf / MILLISECONDS_IN_SECOND, \ video_chunk_size, \ next_video_chunk_sizes, \ end_of_video, \ video_chunk_remain if __name__ == "__main__": all_cooked_time, all_cooked_bw, _ = load_trace() env = Environment(all_cooked_time, all_cooked_bw) env.get_video_chunk(0) env.get_video_chunk(1)
# train path NN_MODEL = None # NN_MODEL = './a2c_results_test/nn_model_ep_91.ckpt' # can load trained model NETWORK_TRACE = 'fixed' VIDEO_TRACE = 'AsianCup_China_Uzbekistan' VIDEO_TRACE_list = [ 'AsianCup_China_Uzbekistan', 'Fengtimo_2018_11_3', 'game', 'room', 'sports' ] network_trace_dir = './dataset/network_trace/' + NETWORK_TRACE + '/' video_trace_prefix = './dataset/video_trace/' + VIDEO_TRACE + '/frame_trace_' LOG_FILE_PATH = './log/' SUMMARY_DIR = './L2AC_results' # trained model path # load the network trace all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( network_trace_dir) # defalut setting epoch_reward = 0 last_bit_rate = 0 bit_rate = 0 target_buffer = 0 state = np.zeros((S_DIM, S_LEN)) thr_record = np.zeros(8) # plot info idx = 0 id_list = [] bit_rate_record = [] buffer_record = [] throughput_record = []
parser.add_argument('-q', '--qoe-metric', choices=['lin', 'log', 'hd']) parser.add_argument('-l', '--log', action='store_true') parser.add_argument('-i', '--lin', action='store_true') parser.add_argument('-m', '--iters', type=int) parser.add_argument('-t', '--traces', choices=['norway', 'fcc', 'oboe']) args = parser.parse_args() n_batch_rollouts = 10 max_iters = args.iters max_pts = 200000 train_frac = 0.8 np.random.seed(RANDOM_SEED) states, actions, serials = [], [], [] precision = [] #trees = [] all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( args.traces) if args.abr == 'hotdash': net_env = env_hotdash.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) else: net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) if args.abr == 'pensieve': teacher = pensieve.Pensieve() student = pensilin.Pensilin() #test = pensieve.Pensieve() elif args.abr == 'robustmpc': teacher = robustmpc.RobustMPC()
def loopmain(): pool_ = pool.pool() video_size = {} # in bytes vmaf_size = {} for bitrate in range(BITRATE_LEVELS): video_size[bitrate] = [] vmaf_size[bitrate] = [] with open(VIDEO_SIZE_FILE + str(bitrate)) as f: for line in f: video_size[bitrate].append(int(line.split()[0])) with open(VMAF + str(BITRATE_LEVELS - bitrate)) as f: for line in f: vmaf_size[bitrate].append(float(line)) all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( TRAIN_TRACES) net_env = env.Environment(TRAIN_TRACES) with open(LOG_FILE + 'agent', 'w') as log_file: actor = a3c.ActorNetwork(state_dim=[S_INFO, S_LEN], action_dim=A_DIM, max_depth=6) bit_rate = DEFAULT_QUALITY last_chunk_vmaf = None action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] a_real_batch = [action_vec] r_batch = [] time_stamp = 0 throu_array, delay_array = [], [] epoch = 0 while True: net_env.get_video_chunk(int(bit_rate)) #next_video_chunk_sizes, next_video_chunk_vmaf, \ delay, sleep_time, buffer_size, rebuf, video_chunk_size, \ end_of_video, video_chunk_remain, video_chunk_vmaf = \ net_env.delay0, net_env.sleep_time0, net_env.return_buffer_size0, net_env.rebuf0, \ net_env.video_chunk_size0, net_env.end_of_video0, net_env.video_chunk_remain0, net_env.video_chunk_vmaf0 next_video_chunk_sizes = [] for i in range(A_DIM): next_video_chunk_sizes.append( video_size[i][net_env.video_chunk_counter]) next_video_chunk_vmaf = [] for i in range(A_DIM): next_video_chunk_vmaf.append( vmaf_size[i][net_env.video_chunk_counter]) time_stamp += delay # in ms time_stamp += sleep_time # in ms if last_chunk_vmaf is None: last_chunk_vmaf = video_chunk_vmaf reward = 0.8469011 * video_chunk_vmaf - 28.79591348 * rebuf + 0.29797156 * \ np.abs(np.maximum(video_chunk_vmaf - last_chunk_vmaf, 0.)) - 1.06099887 * \ np.abs(np.minimum(video_chunk_vmaf - last_chunk_vmaf, 0.)) - \ 2.661618558192494 r_batch.append(reward) last_bit_rate = bit_rate last_chunk_vmaf = video_chunk_vmaf state = np.zeros([S_INFO, S_LEN]) throughput = video_chunk_size / delay / M_IN_K throu_array.append(throughput) delay_array.append(delay) if len(throu_array) >= FUTURE_P: throu_array.pop(0) delay_array.pop(0) mean, var = mean_var(throu_array, delay_array) # this should be S_INFO number of terms # state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE)) # last quality state[0, -1] = video_chunk_vmaf state[1, -1] = buffer_size # 10 sec state[2, -1] = mean state[3, -1] = var # 10 sec action_prob = actor.predict(np.reshape(state, (-1, S_INFO, S_LEN))) net_env.get_optimal(float(last_chunk_vmaf)) action_real = int(net_env.optimal) # force robust if actor.compute_entropy(action_prob) > ENTROPY_THRES: action_cumsum = np.cumsum(action_prob) bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax() else: bit_rate = np.random.randint(A_DIM) action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 action_real_vec = np.zeros(A_DIM) action_real_vec[action_real] = 1 pool_.submit(state, action_real_vec) # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(VIDEO_BIT_RATE[action_real]) + '\t' + str(reward) + '\n') log_file.flush() # report experience to the coordinator if len(r_batch) >= TRAIN_SEQ_LEN or end_of_video: del s_batch[:] del a_batch[:] del r_batch[:] del a_real_batch[:] throu_array, delay_array = [], [] # so that in the log we know where video ends log_file.write('\n') # store the state and action into batches if end_of_video: last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here last_chunk_vmaf = None #chunk_index = 0 action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 action_real_vec = np.zeros(A_DIM) action_real_vec[action_real] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) a_real_batch.append(action_real_vec) epoch += 1 if epoch % 10 == 0: print(time.time()) training_s_batch, training_a_batch = pool_.get() if training_s_batch.shape[0] > 0: actor.train(np.array(training_s_batch), np.array(training_a_batch)) actor.save('pitree/pitree' + str(epoch) + '.model') os.system('python dt_test.py ' + 'pitree/pitree' + str(epoch) + '.model') os.system('python plot_results.py >> tab.log') #d_batch.append(np.zeros((3, 5))) else: s_batch.append(state) action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 a_batch.append(action_vec) a_real_batch.append(action_vec)
def test(user_id,ABR_NAME_,QoE_,NETWORK_TRACE_,VIDEO_TRACE_): #1 Algorithm Setting: RBA, BBA, DYNAMIC, PDDQN, Pensieve ABR_NAME = ABR_NAME_ #2 QoE Setting: ar, al, hd, b, max QoE = QoE_ #3 Network Dataset: high, medium, low, fixed NETWORK_TRACE = NETWORK_TRACE_ #4 Video Dataset: AsianCup_China_Uzbekistan, Fengtimo_2018_11_3, YYF_2018_08_12 VIDEO_TRACE = VIDEO_TRACE_ model_name = "" if ABR_NAME == 'BBA': import BBA as ABR if ABR_NAME == 'RBA': import RBA as ABR if ABR_NAME == 'DYNAMIC': import DYNAMIC as ABR if ABR_NAME == 'PDDQN': model_name = "./PDDQN_models/PDDQN_b/" import PDDQN_ as ABR if ABR_NAME == 'PDDQN-R': model_name = "./PDDQN_models/"+QoE+'/' import PDDQN_R as ABR if ABR_NAME == 'Pensieve': model_name = "./Pensieve_models/"+QoE+'/' import Pensieve as ABR SMOOTH_PENALTY = 0 REBUF_PENALTY = 0.0 LANTENCY_PENALTY = 0.0 SKIP_PENALTY = 0.0 BITRATE_REWARD = 0.0 if QoE == 'al': SMOOTH_PENALTY = 0.01 REBUF_PENALTY = 1.5 LANTENCY_PENALTY = 0.01 BITRATE_REWARD = 0.001 SKIP_PENALTY = 1 if QoE == 'ar': SMOOTH_PENALTY = 0.0 REBUF_PENALTY = 3 LANTENCY_PENALTY = 0.0 BITRATE_REWARD = 0.001 SKIP_PENALTY = 0.0 if QoE == 'b': SMOOTH_PENALTY = 0.02 REBUF_PENALTY = 1.5 LANTENCY_PENALTY = 0.005 BITRATE_REWARD = 0.001 SKIP_PENALTY = 0.5 if QoE == 'hd': SMOOTH_PENALTY = 0.0 REBUF_PENALTY = 0.5 LANTENCY_PENALTY = 0.0 BITRATE_REWARD = 0.001 SKIP_PENALTY = 0.0 if QoE == 'max': SMOOTH_PENALTY = 0 REBUF_PENALTY = 0.0 LANTENCY_PENALTY = 0.0 SKIP_PENALTY = 0.0 BITRATE_REWARD = 0.001 FILE_NAME = './'+'result/'+QoE+'_'+NETWORK_TRACE+'_'+VIDEO_TRACE+'.csv' else: FILE_NAME = './'+'result/'+ABR_NAME+'_'+QoE+'_'+NETWORK_TRACE+'_'+VIDEO_TRACE+'.csv' FILE_NAME = './' + 'result/Startup/' + NETWORK_TRACE +'/'+ABR_NAME+ '/QoE.csv' out = open(FILE_NAME, 'w', newline='') w = csv.writer(out) DEBUG = False LOG_FILE_PATH = './log/' # create result directory if not os.path.exists(LOG_FILE_PATH): os.makedirs(LOG_FILE_PATH) # -- End Configuration -- network_trace_dir = './dataset/new_network_trace/' + NETWORK_TRACE + '/' video_trace_prefix = './dataset/video_trace/' + VIDEO_TRACE + '/frame_trace_' # load the trace all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(network_trace_dir) start_avgbw = (sum(all_cooked_bw[0][0:10])/10) *1000 # random_seed random_seed = 2 count = 0 trace_count = 1 FPS = 25 frame_time_len = 0.04 reward_all_sum = 0 run_time = 0 net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, random_seed=random_seed, logfile_path=LOG_FILE_PATH, VIDEO_SIZE_FILE=video_trace_prefix, Debug=DEBUG) abr = ABR.Algorithm() abr_init = abr.Initial(model_name) BIT_RATE = [500.0, 850.0, 1200.0, 1850.0] # kpbs TARGET_BUFFER = [0.5,0.75,1,1.25] # seconds # ABR setting RESEVOIR = 0.5 CUSHION = 2 cnt = 0 # defalut setting last_bit_rate = 0 bit_rate = 0 target_buffer = 0 latency_limit = 4 # reward setting reward_frame = 0 reward_all = 0 # past_info setting past_frame_num = 200 S_time_interval = [0] * past_frame_num S_send_data_size = [0] * past_frame_num S_chunk_len = [0] * past_frame_num S_rebuf = [0] * past_frame_num S_buffer_size = [0] * past_frame_num S_end_delay = [0] * past_frame_num S_chunk_size = [0] * past_frame_num S_play_time_len = [0] * past_frame_num S_decision_flag = [0] * past_frame_num S_buffer_flag = [0] * past_frame_num S_cdn_flag = [0] * past_frame_num S_skip_time = [0] * past_frame_num # params setting call_time_sum = 0 reward_chunk = 0 while True: reward_frame = 0 time, time_interval, send_data_size, chunk_len, \ rebuf, buffer_size, play_time_len, end_delay, \ cdn_newest_id, download_id, cdn_has_frame, skip_frame_time_len, decision_flag, \ buffer_flag, cdn_flag, skip_flag, end_of_video = net_env.get_video_frame(bit_rate, target_buffer, latency_limit) # S_info is sequential order S_time_interval.pop(0) S_send_data_size.pop(0) S_chunk_len.pop(0) S_buffer_size.pop(0) S_rebuf.pop(0) S_end_delay.pop(0) S_play_time_len.pop(0) S_decision_flag.pop(0) S_buffer_flag.pop(0) S_cdn_flag.pop(0) S_skip_time.pop(0) S_time_interval.append(time_interval) S_send_data_size.append(send_data_size) S_chunk_len.append(chunk_len) S_buffer_size.append(buffer_size) S_rebuf.append(rebuf) S_end_delay.append(end_delay) S_play_time_len.append(play_time_len) S_decision_flag.append(decision_flag) S_buffer_flag.append(buffer_flag) S_cdn_flag.append(cdn_flag) S_skip_time.append(skip_frame_time_len) # QOE setting # if end_delay <= 1.0: # LANTENCY_PENALTY = 0.005 # else: # LANTENCY_PENALTY = 0.01 if not cdn_flag: reward_frame = frame_time_len * float(BIT_RATE[ bit_rate]) * BITRATE_REWARD - REBUF_PENALTY * rebuf - LANTENCY_PENALTY * end_delay - SKIP_PENALTY * skip_frame_time_len else: reward_frame = -(REBUF_PENALTY * rebuf) if decision_flag or end_of_video: reward_frame += -1 * SMOOTH_PENALTY * (abs(BIT_RATE[bit_rate] - BIT_RATE[last_bit_rate]) / 1000) reward_chunk += reward_frame w.writerow([ABR_NAME,reward_chunk]) reward_chunk = 0 last_bit_rate = bit_rate # ----------------- Your Algorithm --------------------- cnt += 1 timestamp_start = tm.time() bit_rate, target_buffer, latency_limit = abr.run(time, S_time_interval, S_send_data_size, S_chunk_len, S_rebuf, S_buffer_size, S_play_time_len, S_end_delay, S_decision_flag, S_buffer_flag, S_cdn_flag, S_skip_time, end_of_video, cdn_newest_id, download_id, cdn_has_frame, abr_init, start_avgbw) start_avgbw = -1 timestamp_end = tm.time() call_time_sum += timestamp_end - timestamp_start # -------------------- End -------------------------------- else: reward_chunk += reward_frame if end_of_video: break # print("network traceID, network_reward, avg_running_time", trace_count, reward_all, call_time_sum / cnt) reward_all = reward_all/cnt reward_all_sum += reward_all run_time += call_time_sum / cnt if trace_count >= len(all_file_names): break trace_count += 1 cnt = 0 call_time_sum = 0 last_bit_rate = 0 reward_all = 0 bit_rate = 0 target_buffer = 0 S_time_interval = [0] * past_frame_num S_send_data_size = [0] * past_frame_num S_chunk_len = [0] * past_frame_num S_rebuf = [0] * past_frame_num S_buffer_size = [0] * past_frame_num S_end_delay = [0] * past_frame_num S_chunk_size = [0] * past_frame_num S_play_time_len = [0] * past_frame_num S_decision_flag = [0] * past_frame_num S_buffer_flag = [0] * past_frame_num S_cdn_flag = [0] * past_frame_num reward_all += reward_frame return [reward_all_sum / trace_count, run_time / trace_count]
def main(self, args, net_env=None, policy=None): np.random.seed(RANDOM_SEED) viper_flag = True assert len(VIDEO_BIT_RATE) == A_DIM log_f = LOG_FILE if net_env is None: viper_flag = False all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( args.traces) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) # if args.update: # log_f = log_f.replace('dt', 'du') if not viper_flag and args.log: log_path = LOG_FILE + '_' + net_env.all_file_names[ net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY s_batch = [np.zeros((S_INFO, S_LEN))] # a_batch = np.zeros((TOTAL_VIDEO_CHUNKS, 3)) r_batch = [] rollout = [] video_count = 0 reward_sum = 0 in_compute = [] # load dt policy if policy is None: with open(args.dt, 'rb') as f: policy = pk.load(f) policy = fsm.FSM(policy) # ========= @ zili: debug ======== # with open('decision_tree_ready/robustmpc_norway_500.pk3', 'rb') as f: # baseline = pk.load(f) while True: # serve video forever delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \ video_chunk_remain = net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms reward = get_reward(bit_rate, rebuf, last_bit_rate, args.qoe_metric) r_batch.append(reward) reward_sum += reward last_bit_rate = bit_rate if args.log: # log time_stamp, bit_rate, buffer_size, reward log_file.write( bytes(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n', encoding='utf-8')) log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR state[2, -1] = rebuf state[3, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[4, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) # state[5: 10, :] = future_chunk_sizes / M_IN_K / M_IN_K serialized_state = serial(state) bit_rate = int(policy.predict([serialized_state])[0]) rollout.append((state, bit_rate, serialized_state)) s_batch.append(state) # ======== @ zili: debug ======== # if video_chunk_remain > 0: # a_batch[TOTAL_VIDEO_CHUNKS - video_chunk_remain][0] = bit_rate # a_batch[TOTAL_VIDEO_CHUNKS - video_chunk_remain][2] = int(baseline.predict([serialized_state])[0]) # if args.update: # chunk_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain) # policy.chunk_leaf[chunk_index] = policy.tree.apply(np.array(serialized_state).reshape(1, -1)) # if chunk_index < CHUNK_TIL_VIDEO_END_CAP - HORIZON: # in_compute.append(fsm.Trajectory(chunk_index, max(0, bit_rate - 1), buffer_size - CHUNK_LEN, # last_bit_rate, state, args)) # in_compute.append(fsm.Trajectory(chunk_index, bit_rate, buffer_size - CHUNK_LEN, # last_bit_rate, state, args)) # in_compute.append(fsm.Trajectory(chunk_index, min(5, bit_rate + 1), buffer_size - CHUNK_LEN, # last_bit_rate, state, args)) # # for traj in in_compute: # this_chunk_size = video_chunk_size # this_delay = delay # while True: # if traj.apply(this_chunk_size, this_delay) == CHUNK_SWITCH: # new_bitrate = int(policy.predict(np.array(serial(traj.states)).reshape(1, -1))[0]) # traj.next_chunk(new_bitrate) # this_chunk_size, this_delay = traj.trans_msg # else: # break # # while len(in_compute) > 1 and in_compute[0].end and in_compute[1].end and in_compute[2].end: # r_below = sum([get_reward(in_compute[0].quality[i], in_compute[0].rebuf[i], # in_compute[0].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)]) # r_normal = sum([get_reward(in_compute[1].quality[i], in_compute[1].rebuf[i], # in_compute[1].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)]) # r_above = sum([get_reward(in_compute[2].quality[i], in_compute[2].rebuf[i], # in_compute[2].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)]) # if r_above == max(r_below, r_normal, r_above): # policy.update(in_compute[0].chunk_index, 1) # # a_batch[in_compute[0].chunk_index][1] = in_compute[0].chunk_init_bitrate # elif r_normal == max(r_below, r_normal, r_above): # policy.update(in_compute[0].chunk_index, -1) # # a_batch[in_compute[1].chunk_index][1] = in_compute[1].chunk_init_bitrate # else: # policy.update(in_compute[0].chunk_index, 0) # # a_batch[in_compute[2].chunk_index][1] = in_compute[2].chunk_init_bitrate # # in_compute.pop(0) # in_compute.pop(0) # in_compute.pop(0) if end_of_video: # print(a_batch) if args.log: log_file.write(bytes('\n', encoding='utf-8')) log_file.close() print("video count", video_count) last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here r_batch = [] in_compute = [] if viper_flag: return rollout else: video_count += 1 if video_count >= len(net_env.all_file_names): break if args.log: log_path = log_f + '_' + net_env.all_file_names[ net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') return reward_sum
def main(): # utility_offset = -math.log(VIDEO_BIT_RATE[0]) # so utilities[0] = 0 # utilities = [math.log(b) + utility_offset for b in VIDEO_BIT_RATE] np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, _ = load_trace.load_trace() load_trace.plot_bandwidth(all_cooked_time, all_cooked_bw, _) if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) with tf.Session() as sess, open(LOG_FILE, 'w') as log_file: actor = a3c.ActorNetwork(sess, state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) critic = a3c.CriticNetwork(sess, state_dim=[S_INFO, S_LEN], learning_rate=CRITIC_LR_RATE) summary_ops, summary_vars = a3c.build_summaries() sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph) # training monitor saver = tf.train.Saver() # save neural net parameters # restore neural net parameters nn_model = NN_MODEL if nn_model is not None: # nn_model is the path to file saver.restore(sess, nn_model) print("Model restored.") epoch = 0 time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] entropy_record = [] actor_gradient_batch = [] critic_gradient_batch = [] while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_counter,throughput,video_chunk_remain = \ net_env.get_video_chunk(bit_rate) #print(net_env.get_video_chunk(bit_rate)) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty - smooth penalty reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K r_batch.append(reward) last_bit_rate = bit_rate # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # print(state) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array( next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) # print('state',state) action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN))) action_cumsum = np.cumsum(action_prob) rand = np.random.randint(1, RAND_RANGE) / float(RAND_RANGE) print(action_cumsum, action_cumsum > rand, (action_cumsum > rand).argmax()) # print(action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)) # print(action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax() #compute Vp and map bitrate # bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax() Vp_index = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax() Vp = BUFFER_PARAMETER[Vp_index] # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states config = { 'buffer_size': env.BUFFER_THRESH, 'gp': GP, 'Vp': Vp, 'abr_osc': False, 'abr_basic': False, 'no_ibr': False } bola = get_bitrate.Bola(config=config) bit_rate = bola.get_quality( Vp, buffer_size * env.MILLISECONDS_IN_SECOND, last_bit_rate, throughput) #决策前的信息 print( '[%d]:download time %.2fms,thrput=%.2f,chunk size %d,buffer=%.2fs,bitrate=%d' % (video_chunk_counter, throughput, delay, video_chunk_size, buffer_size, last_bit_rate)) entropy_record.append(a3c.compute_entropy(action_prob[0])) # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() if len(r_batch ) >= TRAIN_SEQ_LEN or end_of_video: # do training once actor_gradient, critic_gradient, td_batch = \ a3c.compute_gradients(s_batch=np.stack(s_batch[1:], axis=0), # ignore the first chuck a_batch=np.vstack(a_batch[1:]), # since we don't have the r_batch=np.vstack(r_batch[1:]), # control over it terminal=end_of_video, actor=actor, critic=critic) td_loss = np.mean(td_batch) actor_gradient_batch.append(actor_gradient) critic_gradient_batch.append(critic_gradient) print("====") print("Epoch", epoch) print("TD_loss", td_loss, "Avg_reward", np.mean(r_batch), "Avg_entropy", np.mean(entropy_record)) print("====") summary_str = sess.run(summary_ops, feed_dict={ summary_vars[0]: td_loss, summary_vars[1]: np.mean(r_batch), summary_vars[2]: np.mean(entropy_record) }) writer.add_summary(summary_str, epoch) writer.flush() entropy_record = [] if len(actor_gradient_batch) >= GRADIENT_BATCH_SIZE: assert len(actor_gradient_batch) == len( critic_gradient_batch) # assembled_actor_gradient = actor_gradient_batch[0] # assembled_critic_gradient = critic_gradient_batch[0] # assert len(actor_gradient_batch) == len(critic_gradient_batch) # for i in xrange(len(actor_gradient_batch) - 1): # for j in xrange(len(actor_gradient)): # assembled_actor_gradient[j] += actor_gradient_batch[i][j] # assembled_critic_gradient[j] += critic_gradient_batch[i][j] # actor.apply_gradients(assembled_actor_gradient) # critic.apply_gradients(assembled_critic_gradient) for i in range(len(actor_gradient_batch)): actor.apply_gradients(actor_gradient_batch[i]) critic.apply_gradients(critic_gradient_batch[i]) actor_gradient_batch = [] critic_gradient_batch = [] epoch += 1 if epoch % MODEL_SAVE_INTERVAL == 0: # Save the neural net parameters to disk. save_path = saver.save( sess, SUMMARY_DIR + "/nn_model_ep_" + str(epoch) + ".ckpt") print("Model saved in file: %s" % save_path) del s_batch[:] del a_batch[:] del r_batch[:] if end_of_video: last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) else: s_batch.append(state) action_vec = np.zeros(A_DIM) # print(bit_rate) action_vec[bit_rate] = 1 a_batch.append(action_vec)
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace() net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] entropy_record = [] video_count = 0 # make chunk combination options for combo in itertools.product([0, 1, 2, 3, 4, 5], repeat=5): CHUNK_COMBO_OPTIONS.append(combo) while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K # log scale reward # log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[0])) # log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[0])) # reward = log_bit_rate \ # - REBUF_PENALTY * rebuf \ # - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate) # reward = BITRATE_REWARD[bit_rate] \ # - 8 * rebuf - np.abs(BITRATE_REWARD[bit_rate] - BITRATE_REWARD[last_bit_rate]) r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR state[2, -1] = rebuf state[3, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[4, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) # state[5: 10, :] = future_chunk_sizes / M_IN_K / M_IN_K # ================== MPC ========================= curr_error = 0 # defualt assumes that this is the first request so error is 0 since we have never predicted bandwidth if (len(past_bandwidth_ests) > 0): curr_error = abs(past_bandwidth_ests[-1] - state[3, -1]) / float( state[3, -1]) past_errors.append(curr_error) # pick bitrate according to MPC # first get harmonic mean of last 5 bandwidths past_bandwidths = state[3, -5:] while past_bandwidths[0] == 0.0: past_bandwidths = past_bandwidths[1:] #if ( len(state) < 5 ): # past_bandwidths = state[3,-len(state):] #else: # past_bandwidths = state[3,-5:] bandwidth_sum = 0 for past_val in past_bandwidths: bandwidth_sum += (1 / float(past_val)) harmonic_bandwidth = 1.0 / (bandwidth_sum / len(past_bandwidths)) # future bandwidth prediction # divide by 1 + max of last 5 (or up to 5) errors max_error = 0 error_pos = -5 if (len(past_errors) < 5): error_pos = -len(past_errors) max_error = float(max(past_errors[error_pos:])) future_bandwidth = harmonic_bandwidth / (1 + max_error ) # robustMPC here past_bandwidth_ests.append(harmonic_bandwidth) # future chunks length (try 4 if that many remaining) last_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain) future_chunk_length = MPC_FUTURE_CHUNK_COUNT if (TOTAL_VIDEO_CHUNKS - last_index < 5): future_chunk_length = TOTAL_VIDEO_CHUNKS - last_index # all possible combinations of 5 chunk bitrates (9^5 options) # iterate over list and for each, compute reward and store max reward combination max_reward = -100000000 best_combo = () start_buffer = buffer_size #start = time.time() for full_combo in CHUNK_COMBO_OPTIONS: combo = full_combo[0:future_chunk_length] # calculate total rebuffer time for this combination (start with start_buffer and subtract # each download time and add 2 seconds in that order) curr_rebuffer_time = 0 curr_buffer = start_buffer bitrate_sum = 0 smoothness_diffs = 0 last_quality = int(bit_rate) for position in range(0, len(combo)): chunk_quality = combo[position] index = last_index + position + 1 # e.g., if last chunk is 3, then first iter is 3+0+1=4 download_time = ( get_chunk_size(chunk_quality, index) / 1000000.) / future_bandwidth # this is MB/MB/s --> seconds if (curr_buffer < download_time): curr_rebuffer_time += (download_time - curr_buffer) curr_buffer = 0 else: curr_buffer -= download_time curr_buffer += 4 bitrate_sum += VIDEO_BIT_RATE[chunk_quality] smoothness_diffs += abs(VIDEO_BIT_RATE[chunk_quality] - VIDEO_BIT_RATE[last_quality]) # bitrate_sum += BITRATE_REWARD[chunk_quality] # smoothness_diffs += abs(BITRATE_REWARD[chunk_quality] - BITRATE_REWARD[last_quality]) last_quality = chunk_quality # compute reward for this combination (one reward per 5-chunk combo) # bitrates are in Mbits/s, rebuffer in seconds, and smoothness_diffs in Mbits/s reward = (bitrate_sum / 1000.) - ( REBUF_PENALTY * curr_rebuffer_time) - (smoothness_diffs / 1000.) # reward = bitrate_sum - (8*curr_rebuffer_time) - (smoothness_diffs) if (reward >= max_reward): if (best_combo != ()) and best_combo[0] < combo[0]: best_combo = combo else: best_combo = combo max_reward = reward # send data to html side (first chunk of best combo) send_data = 0 # no combo had reward better than -1000000 (ERROR) so send 0 if (best_combo != ()): # some combo was good send_data = best_combo[0] bit_rate = send_data # hack # if bit_rate == 1 or bit_rate == 2: # bit_rate = 0 # ================================================ # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states s_batch.append(state) if end_of_video: log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] print "video count", video_count video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb')
def main(self, args, net_env=None): self.args = args np.random.seed(RANDOM_SEED) viper_flag = True assert len(VIDEO_BIT_RATE) == A_DIM if net_env is None: viper_flag = False all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( args.traces) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) if not viper_flag and args.log: log_path = LOG_FILE + '_' + net_env.all_file_names[ net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] rollout = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \ video_chunk_remain = net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms reward = get_reward(bit_rate, rebuf, last_bit_rate, args.qoe_metric) r_batch.append(reward) last_bit_rate = bit_rate if args.log: # log time_stamp, bit_rate, buffer_size, reward log_file.write( bytes(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n', encoding='utf-8')) log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR state[2, -1] = rebuf state[3, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[4, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) bit_rate = self.predict(state) serialized_state = [] # Log input of neural network serialized_state.append(state[0, -1]) serialized_state.append(state[1, -1]) serialized_state.append(state[2, -1]) for i in range(5): serialized_state.append(state[3, i]) serialized_state.append(state[4, -1]) #print(serialized_state) #print(state) rollout.append((state, bit_rate, serialized_state)) if end_of_video: if args.log: log_file.write(bytes('\n', encoding='utf-8')) log_file.close() print("video count", video_count) last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] if viper_flag: break else: video_count += 1 if video_count >= len(net_env.all_file_names): break if args.log: log_path = LOG_FILE + '_' + net_env.all_file_names[ net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') return rollout
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(TEST_TRACES) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') with tf.Session() as sess: actor = a3c.ActorNetwork(sess, state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) critic = a3c.CriticNetwork(sess, state_dim=[S_INFO, S_LEN], learning_rate=CRITIC_LR_RATE) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # save neural net parameters # restore neural net parameters if NN_MODEL is not None: # NN_MODEL is the path to file saver.restore(sess, NN_MODEL) print("Testing model restored.") time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] entropy_record = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty - smoothness reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN))) action_cumsum = np.cumsum(action_prob) bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax() # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states DECISIONS.append(bit_rate) s_batch.append(state) entropy_record.append(a3c.compute_entropy(action_prob[0])) if end_of_video: log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') print "Decisions: {}".format(Counter(DECISIONS))