示例#1
0
def main():

    np.random.seed(RANDOM_SEED)
    if not mp_util.MP_ENABLED:
        assert len(VIDEO_BIT_RATE) == A_DIM

    # create result directory
    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    # inter-process communication queues
    net_params_queues = []
    exp_queues = []
    for i in xrange(NUM_AGENTS):
        net_params_queues.append(mp.Queue(1))
        exp_queues.append(mp.Queue(1))

    # create a coordinator and multiple agent processes
    # (note: threading is not desirable due to python GIL)
    coordinator = mp.Process(target=central_agent,
                             args=(net_params_queues, exp_queues))
    coordinator.start()

    all_cooked_time, all_cooked_bw, _ = load_trace.load_trace(TRAIN_TRACES)
    if mp_util.MP_ENABLED:
        all_cooked_time_lte, all_cooked_bw_lte, _ = load_trace.load_trace(
            TRAIN_TRACES_LTE)

        both_cooked_time = {}
        both_cooked_time['wifi'] = all_cooked_time
        both_cooked_time['lte'] = all_cooked_time_lte

        both_cooked_bw = {}
        both_cooked_bw['wifi'] = all_cooked_bw
        both_cooked_bw['lte'] = all_cooked_bw_lte

    agents = []
    for i in xrange(NUM_AGENTS):
        if mp_util.MP_ENABLED:
            agents.append(
                mp.Process(target=agent,
                           args=(i, both_cooked_time, both_cooked_bw,
                                 net_params_queues[i], exp_queues[i])))
        else:
            agents.append(
                mp.Process(target=agent,
                           args=(i, all_cooked_time, all_cooked_bw,
                                 net_params_queues[i], exp_queues[i])))
    for i in xrange(NUM_AGENTS):
        agents[i].start()

    # wait unit training is done
    coordinator.join()
def main():
    np.random.seed(42)

    os.system('rm ' + TEST_LOG_PATH)

    ta_q = Tabular_Q()

    all_cooked_time, all_cooked_bw, _ = load_trace.load_trace()

    epoch = 0
    time_stamp = 0

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    state = [0, 0, 0, 0]

    while True:

        delay, sleep_time, buffer_size, rebuf, \
        video_chunk_size, next_video_chunk_sizes, \
        end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                 - REBUF_PENALTY * rebuf \
                 - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                           VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

        epoch += 1

        bw = float(video_chunk_size) / float(
            delay) / M_IN_K * BITS_IN_BYTE  # Mbit/sec
        bw = min(int(bw / D_BW) * D_BW, BW_MAX)
        bf = min(int(buffer_size / D_BF) * D_BF, BF_MAX)
        br = bit_rate
        c = min(video_chunk_remain, N_CHUNK - 1)
        next_state = [bw, bf, br, c]

        ta_q.train_q(state, bit_rate, reward, next_state, end_of_video)

        state = next_state
        last_bit_rate = bit_rate

        bit_rate = ta_q.get_q_action(state)

        if end_of_video:
            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY
            state = [0, 0, 0, 0]

        if epoch % TEST_INTERVAL == 0:
            testing(ta_q, epoch)
            np.save(TEST_LOG_PATH + '_q_table.npy', ta_q.q_table)
示例#3
0
文件: multi_agent.py 项目: xgw/proj
def main():

    np.random.seed(RANDOM_SEED)
    assert len(VIDEO_BIT_RATE) == A_DIM

    # create result directory
    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    # inter-process communication queues
    net_params_queues = []
    exp_queues = []
    for i in xrange(NUM_AGENTS):
        net_params_queues.append(mp.Queue(1))
        exp_queues.append(mp.Queue(1))

    # create a coordinator and multiple agent processes
    # (note: threading is not desirable due to python GIL)
    coordinator = mp.Process(target=central_agent,
                             args=(net_params_queues, exp_queues))
    coordinator.start()

    trace_index = np.random.randint(1, 65)
    all_cooked_time, all_cooked_bw = load_trace.load_trace(trace_index)
    agents = []
    for i in xrange(NUM_AGENTS):
        agents.append(
            mp.Process(target=agent,
                       args=(i, all_cooked_time, all_cooked_bw,
                             net_params_queues[i], exp_queues[i])))
    for i in xrange(NUM_AGENTS):
        agents[i].start()

    # wait unit training is done
    coordinator.join()
示例#4
0
    def __init__(self):
        self.args = EnvArgs()
        all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
            self.args.test_bw_trace)
        super().__init__(all_cooked_time=all_cooked_time,
                         all_cooked_bw=all_cooked_bw,
                         random_seed=self.args.random_seed,
                         VIDEO_SIZE_FILE=self.args.test_video_size_files,
                         logfile_path='./log/',
                         Debug=False)

        self.state_gop = np.zeros(
            (self.args.s_gop_info,
             self.args.s_gop_len))  # state info for past gops
        self.last_bit_rate = 0
        self.reward_gop = 0
        self.last_reward_gop = 0
        self.action_map = self._set_action_map()

        self.time_intervals = []
        self.send_data_sizes = []
        self.frame_types = []
        self.frame_time_lens = []
        self.real_qualitys = []
        self.buffer_sizes = []
        self.end_delays = []
        self.rebuf_time = 0

        self.call_time = 0
        self.switch_num = 0

        self.gop_sizes = [[0] * 17, [0] * 17]
        # info for traces
        self.traces_len = len(all_file_names)
示例#5
0
def main():

    np.random.seed(RANDOM_SEED)

    # create result directory
    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    # inter-process communication queues
    net_params_queues = []
    exp_queues = []
    for i in xrange(NUM_AGENTS):
        net_params_queues.append(mp.Queue(1))
        exp_queues.append(mp.Queue(1))

    # create a coordinator and multiple agent processes
    # (note: threading is not desirable due to python GIL)
    coordinator = mp.Process(target=central_agent,
                             args=(net_params_queues, exp_queues))
    coordinator.start()

    all_user_pos, _ = load_trace.load_trace(TRAIN_TRACES)
    agents = []
    for i in xrange(NUM_AGENTS):
        agents.append(
            mp.Process(target=agent,
                       args=(i, all_user_pos, net_params_queues[i],
                             exp_queues[i])))
    for i in xrange(NUM_AGENTS):
        agents[i].start()

    # wait unit training is done
    coordinator.join()
示例#6
0
    def __init__(self, random_seed=RANDOM_SEED):
        np.random.seed(random_seed)
        all_cooked_time, all_cooked_bw, _ = load_trace.load_trace()
        self.net_env = abrenv.Environment(all_cooked_time=all_cooked_time,
                                          all_cooked_bw=all_cooked_bw,
                                          random_seed=random_seed)

        self.last_bit_rate = DEFAULT_QUALITY
        self.buffer_size = 0.
        self.state = np.zeros((S_INFO, S_LEN))
        self.reset()
示例#7
0
def main():

    alpha = CNO_PARA_LOSS_RATE
    actor_learning_rate = ACTOR_LR_RATE
    bg_traffic_pattern = 0.0    # model_1
    link_capacity = 20000000    # 20Mbps

    try:
        alpha               = sys.argv[1]
        bg_traffic_pattern  = sys.argv[2]
        actor_learning_rate = sys.argv[3]
        link_capacity       = sys.argv[4]
    except Exception as ex:
        print ("Not all inputs has set via cmd -> alpha[{0}] bg_tp[{1}] a_lr[{2}] lc[{3}]".format(alpha,
                                                                                                  actor_learning_rate,
                                                                                                  bg_traffic_pattern,
                                                                                                  link_capacity))

    np.random.seed(RANDOM_SEED)
    assert len(VIDEO_BIT_RATE) == A_DIM
    
    # create result directory
    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    # inter-process communication queues
    net_params_queues = []
    exp_queues = []
    for i in range(NUM_AGENTS):
        net_params_queues.append(mp.Queue(1))
        exp_queues.append(mp.Queue(1))

    # create a coordinator and multiple agent processes
    # (note: threading is not desirable due to python GIL)
    coordinator = mp.Process(target=central_agent,
                             args=(net_params_queues, exp_queues))
    
    coordinator.start()
    
    all_cooked_time, all_cooked_bw, _ = load_trace.load_trace(TRAIN_TRACES)
    agents = []
    
    for i in range(NUM_AGENTS):
        agents.append(mp.Process(target=agent,
                                 args=(i, all_cooked_time, all_cooked_bw,
                                       net_params_queues[i],
                                       exp_queues[i])))
    
    for i in range(NUM_AGENTS):
        agents[i].start()
    
    # wait unit training is done
    coordinator.join()
示例#8
0
    def __init__(self, random_seed=RANDOM_SEED):
        np.random.seed(RANDOM_SEED)
        self.action_space = spaces.Discrete(A_DIM)
        self.observation_space = spaces.Box(0,
                                            10.0, [S_INFO, S_LEN],
                                            dtype=np.float32)
        all_cooked_time, all_cooked_bw, _ = load_trace.load_trace()
        self.net_env = env.Environment(all_cooked_time=all_cooked_time,
                                       all_cooked_bw=all_cooked_bw)

        self.last_bit_rate = DEFAULT_QUALITY
        self.state = np.zeros((S_INFO, S_LEN))
        self.reset()
示例#9
0
    def __init__(self, time, bandwidth, random_seed=RANDOM_SEED):
        np.random.seed(random_seed)

        self.video_chunk_current = 1
        self.buffer_current = 0

        ## pick a random trace file
        self.trace_index = np.random.randint(1, 65)
        self.time, self.bandwidth = load_trace.load_trace(self.trace_index)

        self.trace_ptr = np.random.randint(1, len(self.bandwidth))
        self.last_time = self.time[self.trace_ptr - 1]
        print(len(self.bandwidth))
示例#10
0
文件: abr.py 项目: zchao520/Zwei
    def __init__(self, random_seed=RANDOM_SEED):
        np.random.seed(random_seed)
        # self.action_space = spaces.Box(
        #     low=0., high=60., shape=(2,), dtype=np.float32)
        # self.observation_space = spaces.Box(
        #     0, 10.0, (S_LEN * S_INFO,), dtype=np.float32)
        all_cooked_time, all_cooked_bw, _ = load_trace.load_trace()
        self.net_env = abrenv.Environment(all_cooked_time=all_cooked_time,
                                          all_cooked_bw=all_cooked_bw,
                                          random_seed=RANDOM_SEED)

        self.last_bit_rate = DEFAULT_QUALITY
        self.buffer_size = 0.
        self.state = np.zeros((S_INFO, S_LEN))
        self.reset()
示例#11
0
def main(arglist):

    time = datetime.now()
    np.random.seed(RANDOM_SEED)
    torch.manual_seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    # create result directory
    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    # inter-process communication queues
    net_params_queues = []
    exp_queues = []
    for i in range(NUM_AGENTS):
        net_params_queues.append(mp.Queue(1))
        exp_queues.append(mp.Queue(1))

    # create a coordinator and multiple agent processes
    # (note: threading is not desirable due to python GIL)
    coordinator = mp.Process(target=central_agent,
                             args=(net_params_queues, exp_queues,
                                   arglist.model_type))
    coordinator.start()

    all_cooked_time, all_cooked_bw, _ = load_trace.load_trace(TRAIN_TRACES)
    agents = []
    for i in range(NUM_AGENTS):
        agents.append(
            mp.Process(target=agent,
                       args=(i, all_cooked_time, all_cooked_bw,
                             net_params_queues[i], exp_queues[i],
                             arglist.model_type)))
    for i in range(NUM_AGENTS):
        agents[i].start()

    # wait unit training is done
    coordinator.join()
    for i in range(NUM_AGENTS):
        agents[i].join()

    print(str(datetime.now() - time))
def main():
    #确定随机数种子
    #生成存储神经网络参数和模拟数据的Queue待用(供主/子agent之间传递数据用)
    #在多进程中分别启动主/子agent,加载文件中的网络状况数据
    np.random.seed(RANDOM_SEED)
    assert len(VIDEO_BIT_RATE) == A_DIM

    # create result directory
    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    # inter-process communication queues 进程间通信队列
    net_params_queues = []
    exp_queues = []
    for i in xrange(NUM_AGENTS):  # 0-15
        net_params_queues.append(mp.Queue(1))  # 加入16个agent进程队列?
        exp_queues.append(mp.Queue(1))  # 加入16个agent进程队列?

    # create a coordinator and multiple agent processes
    # (note: threading is not desirable due to python GIL)
    coordinator = mp.Process(
        target=central_agent,
        args=(net_params_queues,
              exp_queues))  # 创建进程?central_agent是下面的函数,参数是两个队列的列表
    coordinator.start()  # 开始跑进程?

    all_cooked_time, all_cooked_bw, _ = load_trace.load_trace(
        TRAIN_TRACES)  # 载入trace数据?
    agents = []
    for i in xrange(NUM_AGENTS):  # 0-15
        agents.append(
            mp.Process(
                target=agent,
                args=(i, all_cooked_time, all_cooked_bw, net_params_queues[i],
                      exp_queues[i]
                      )))  # 创建进程?agent是下面的函数,参数是agent号,trece数据,对应的两个队列的列表
    for i in xrange(NUM_AGENTS):  # 开始跑进程?
        agents[i].start()

    # wait unit training is done
    coordinator.join()
示例#13
0
def main():

    np.random.seed(RANDOM_SEED)  #generate a random number
    assert len(BIT_RATE) == A_DIM  #if true get 1 else get AssertionError

    # create result directory
    if not os.path.exists(SUMMARY_DIR):  #create result dictionary
        os.makedirs(SUMMARY_DIR)

    # inter-process communication queues
    net_params_queues = []
    exp_queues = []
    for i in xrange(
            NUM_AGENTS
    ):  #the main function is the main process and create queues in parent process
        net_params_queues.append(mp.Queue(1))
        exp_queues.append(mp.Queue(1))

    # create a coordinator and multiple agent processes
    # (note: threading is not desirable due to python GIL)
    coordinator = mp.Process(target=central_agent,
                             args=(net_params_queues, exp_queues))
    coordinator.start()

    network_trace_dir = './dataset/network_trace/' + NETWORK_TRACE + '/'
    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        network_trace_dir)
    #all_cooked_time, all_cooked_bw, _ = load_trace.load_trace(TRAIN_TRACES)
    agents = []
    for i in xrange(NUM_AGENTS):
        agents.append(
            mp.Process(target=agent,
                       args=(i, all_cooked_time, all_cooked_bw, all_file_names,
                             net_params_queues[i], exp_queues[i])))
    for i in xrange(NUM_AGENTS):
        agents[i].start()

    # wait unit training is done
    coordinator.join()
示例#14
0
def main():
    all_cooked_time, all_cooked_bw = load_trace.load_trace()

    video_size = {}  # in bytes
    for bitrate in xrange(BITRATE_LEVELS):
        video_size[bitrate] = []
        with open(VIDEO_SIZE_FILE + str(bitrate)) as f:
            for line in f:
                video_size[bitrate].append(int(line.split()[0]))

    # assert len(all_cooked_time) == len(all_cooked_bw)
    # for cooked_data_idx in xrange(len(all_cooked_time))

    cooked_time = all_cooked_time[0]
    cooked_bw = all_cooked_bw[0]

    # -----------------------------------------
    # step 1: quantize the time and bandwidth
    # -----------------------------------------
    total_time_pt = int(np.ceil(cooked_time[-1] / DT))

    quan_time = np.linspace(np.floor(cooked_time[0]), np.ceil(cooked_time[-1]),
                            total_time_pt + 1)
    quan_bw = np.zeros(len(quan_time))

    curr_time_idx = 0
    for i in xrange(len(quan_bw)):
        while curr_time_idx < len(cooked_time) - 1 and \
              cooked_time[curr_time_idx] < quan_time[i]:
            curr_time_idx += 1
        quan_bw[i] = cooked_bw[curr_time_idx]

    # ----------------------------------------
    # step 2: cap the max time and max buffer
    # ----------------------------------------
    max_video_contents = np.sum(video_size[BITRATE_LEVELS - 1])  # in bytes
    total_bw = np.sum(quan_bw) * DT  # in MBit

    t_portion = max_video_contents / (total_bw * B_IN_MB *
                                      PACKET_PAYLOAD_PORTION / BITS_IN_BYTE)

    t_max = int(np.ceil(np.ceil(cooked_time[-1]) * t_portion))

    t_max_idx = int(np.ceil(t_max / DT))
    b_max_idx = t_max_idx

    full_quan_time = quan_time
    full_quan_bw = quan_bw

    for i in xrange(int(np.ceil(t_portion))):
        full_quan_time = np.append(full_quan_time,
                                   (quan_time[1:] + full_quan_time[-1]))
        full_quan_bw = np.append(full_quan_bw, quan_bw[1:])

    quan_time = full_quan_time
    quan_bw = full_quan_bw

    assert quan_time[-1] >= t_max

    # -----------------------------------------------------------
    # (optional) step 3: pre=compute the download time of chunks
    # download_time(chunk_idx, quan_time, bit_rate)
    # -----------------------------------------------------------
    all_download_time = {}

    # print "Pre-compute the download time table"
    # all_download_time = get_download_time(total_video_chunks=TOTAL_VIDEO_CHUNCK,
    #                                   quan_time=quan_time,
    #                                   quan_bw=quan_bw,
    #                                   dt=DT,
    #                                   video_size=video_size,
    #                                   bitrate_levels=BITRATE_LEVELS)

    # -----------------------------
    # step 4: dynamic programming
    # -----------------------------
    total_reward = {}
    last_dp_pt = {}

    # initialization, take default quality at start off
    download_time = \
        restore_or_compute_download_time(
            all_download_time, 0, 0, DEFAULT_QUALITY,
            quan_time, quan_bw, DT, video_size)
    first_chunk_finish_time = download_time + LINK_RTT / M_IN_K
    first_chunk_finish_idx = int(np.floor(first_chunk_finish_time / DT))
    buffer_size = int(VIDEO_CHUNCK_LEN / M_IN_K / DT)

    total_reward[(0, first_chunk_finish_idx, buffer_size, DEFAULT_QUALITY)] = \
        VIDEO_BIT_RATE[DEFAULT_QUALITY] / M_IN_K \
        - REBUF_PENALTY * first_chunk_finish_time
    last_dp_pt[(0, first_chunk_finish_idx, buffer_size,
                DEFAULT_QUALITY)] = (0, 0, 0, 0)

    for n in xrange(1, TOTAL_VIDEO_CHUNCK):
        print n, TOTAL_VIDEO_CHUNCK
        for t in xrange(t_max_idx):
            for b in xrange(b_max_idx):
                for m in xrange(BITRATE_LEVELS):
                    if (n - 1, t, b, m) in total_reward:
                        for new_bit_rate in xrange(BITRATE_LEVELS):
                            download_time = \
                                restore_or_compute_download_time(
                                    all_download_time, n, t, new_bit_rate,
                                    quan_time, quan_bw, DT, video_size)

                            buffer_size = quan_time[b]
                            rebuf = np.maximum(download_time - buffer_size,
                                               0.0)

                            r = VIDEO_BIT_RATE[new_bit_rate] / M_IN_K \
                                - REBUF_PENALTY * rebuf \
                                - SMOOTH_PENALTY * np.abs(
                                    VIDEO_BIT_RATE[new_bit_rate] -
                                    VIDEO_BIT_RATE[m]) / M_IN_K

                            buffer_size = np.maximum(
                                buffer_size - download_time, 0.0)
                            buffer_size += VIDEO_CHUNCK_LEN / M_IN_K

                            buffer_idx = int(buffer_size / DT)

                            new_time_idx = int(
                                np.floor((quan_time[t] + download_time +
                                          LINK_RTT / M_IN_K) / DT))

                            new_total_reward = total_reward[(n - 1, t, b,
                                                             m)] + r
                            if (n, new_time_idx, buffer_idx,
                                    new_bit_rate) not in total_reward:
                                total_reward[(n, new_time_idx, buffer_idx, new_bit_rate)] = \
                                        new_total_reward
                                last_dp_pt[(n, new_time_idx, buffer_idx, new_bit_rate)] = \
                                    (n - 1, t, b, m)
                            else:
                                if new_total_reward > total_reward[(
                                        n, new_time_idx, buffer_idx,
                                        new_bit_rate)]:
                                    total_reward[(n, new_time_idx, buffer_idx, new_bit_rate)] = \
                                        new_total_reward
                                last_dp_pt[(n, new_time_idx, buffer_idx, new_bit_rate)] = \
                                    (n - 1, t, b, m)

    # ---------------------------------
    # step 5: get the max total reward
    # ---------------------------------
    optimal_total_reward = -np.inf
    end_dp_pt = None
    for k in total_reward:
        if k[0] == TOTAL_VIDEO_CHUNCK - 1:
            if total_reward[k] > optimal_total_reward:
                optimal_total_reward = total_reward[k]
                end_dp_pt = last_dp_pt[k]

    print optimal_total_reward
    if end_dp_pt is not None:
        while end_dp_pt != (0, 0, 0, 0):
            print end_dp_pt
            end_dp_pt = last_dp_pt[end_dp_pt]
示例#15
0
    def main(self, args, net_env=None, policy=None):
        viper_flag = True
        assert len(VIDEO_BIT_RATE) == A_DIM
        log_f = LOG_FILE

        if net_env is None:
            viper_flag = False
            all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(args.traces)
            net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw,
                                      all_file_names=all_file_names)
        
        if not viper_flag and args.log:
            log_path = log_f + '_' + net_env.all_file_names[net_env.trace_idx] + '_' + args.qoe_metric
            log_file = open(log_path, 'wb')

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        rollout = []
        video_count = 0
        reward_sum = 0
        in_compute = []

        # load dt policy
        if policy is None:
            with open(DTModel, 'rb') as f:
                policy = pk.load(f)

        while True:  # serve video forever
            delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \
            video_chunk_remain = net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            reward = get_reward(bit_rate, rebuf, last_bit_rate, args.qoe_metric)
            r_batch.append(reward)
            reward_sum += reward
            last_bit_rate = bit_rate

            if args.log:
                log_file.write(bytes(str(time_stamp / M_IN_K) + '\t' +
                               str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                               str(buffer_size) + '\t' +
                               str(rebuf) + '\t' +
                               str(video_chunk_size) + '\t' +
                               str(delay) + '\t' +
                               str(reward) + '\n', encoding='utf-8'))
                log_file.flush()


            # select bit_rate according to decision tree
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            serialized_state = serial(state)
            bit_rate = int(policy.predict(np.array(serialized_state).reshape(1, -1))[0])
            rollout.append((state, bit_rate, serialized_state))
            s_batch.append(state)

            if end_of_video:
                if args.log:
                    log_file.write(bytes('\n', encoding='utf-8'))
                    log_file.close()
                    print("video count", video_count)

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here
                r_batch = []
                in_compute = []

                if viper_flag:
                    return rollout
                else:
                    video_count += 1
                    if video_count >= len(net_env.all_file_names):
                        break
                    if args.log:
                        log_path = log_f + '_' + net_env.all_file_names[net_env.trace_idx] + '_' + args.qoe_metric
                        log_file = open(log_path, 'wb')

        return reward_sum
def main():
    torch.set_num_threads(1)

    np.random.seed(RANDOM_SEED)
    torch.manual_seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        TEST_TRACES)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'w')

    # all models have same actor network
    # so model_type can be anything
    net = ActorNetwork([S_INFO, S_LEN], A_DIM)

    # restore neural net parameters
    net.load_state_dict(torch.load(ACTOR_MODEL))
    print("Testing model restored.")

    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    video_count = 0
    state = torch.zeros((S_INFO, S_LEN))

    weights = np.array([0.2, 0.3, 0.5])

    while True:  # serve video forever
        # the action is from the last decision
        # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
        video_chunk_size, next_video_chunk_sizes, \
        end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        w1 = weights[0]
        w2 = weights[1]
        w3 = weights[2]

        reward = w1 * VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                 - w2 * REBUF_PENALTY * rebuf \
                 - w3 * SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                                VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

        last_bit_rate = bit_rate

        # log time_stamp, bit_rate, buffer_size, reward
        log_file.write(
            str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) +
            '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' +
            str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) +
            '\n')
        log_file.flush()

        # retrieve previous state
        state = torch.roll(state, -1, dims=-1)

        # this should be S_INFO number of terms
        state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
            np.max(VIDEO_BIT_RATE))  # last quality
        state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
        state[2, -1] = float(video_chunk_size) / float(
            delay) / M_IN_K  # kilo byte / ms
        state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
        state[4, :A_DIM] = torch.tensor(
            next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
        state[5, -1] = min(
            video_chunk_remain,
            CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

        with torch.no_grad():
            probability = net.forward(state.unsqueeze(0))
            m = Categorical(probability)
            bit_rate = m.sample().item()
        # Note: we need to discretize the probability into 1/RAND_RANGE steps,
        # because there is an intrinsic discrepancy in passing single state and batch states

        if end_of_video:
            weights = np.random.randn(3)  # Normalization
            weights = np.abs(weights) / np.linalg.norm(weights, ord=1)
            log_file.write('\n')
            log_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here

            state = torch.zeros((S_INFO, S_LEN))

            video_count += 1

            if video_count >= len(all_file_names):
                break

            log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
            log_file = open(log_path, 'w')
示例#17
0
    def main(self, args, net_env=None, policy=None):
        viper_flag = True
        assert len(VIDEO_BIT_RATE) == A_DIM
        log_f = LOG_FILE

        if net_env is None:
            viper_flag = False
            all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(args.traces)
            net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw,
                                      all_file_names=all_file_names)
        if args.update:
            log_f = log_f.replace('dt', 'du')

        if not viper_flag and args.log:
            log_path = log_f + '_' + net_env.all_file_names[net_env.trace_idx] + '_' + args.qoe_metric
            log_file = open(log_path, 'wb')

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        rollout = []
        video_count = 0
        reward_sum = 0
        in_compute = []

        # load dt policy
        if policy is None:
            with open(args.dt, 'rb') as f:
                policy = pk.load(f)
        policy = fsm.FSM(policy)

        while True:  # serve video forever
            delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \
            video_chunk_remain = net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            reward = get_reward(bit_rate, rebuf, last_bit_rate, args.qoe_metric)
            r_batch.append(reward)
            reward_sum += reward
            last_bit_rate = bit_rate

            if args.log:
                log_file.write(bytes(str(time_stamp / M_IN_K) + '\t' +
                               str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                               str(buffer_size) + '\t' +
                               str(rebuf) + '\t' +
                               str(video_chunk_size) + '\t' +
                               str(delay) + '\t' +
                               str(reward) + '\n', encoding='utf-8'))
                log_file.flush()


            # select bit_rate according to decision tree
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            serialized_state = serial(state)
            bit_rate = int(policy.predict(np.array(serialized_state).reshape(1, -1))[0])
            rollout.append((state, bit_rate, serialized_state))
            s_batch.append(state)

            if args.update:
                chunk_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain - 1)
                policy.chunk_leaf[chunk_index] = policy.tree.apply(np.array(serialized_state).reshape(1, -1))
                if chunk_index < CHUNK_TIL_VIDEO_END_CAP - HORIZON:
                    in_compute.append(fsm.Trajectory(chunk_index, max(0, bit_rate - 1), buffer_size - CHUNK_LEN,
                                                     last_bit_rate, state, args))
                    in_compute.append(fsm.Trajectory(chunk_index, bit_rate, buffer_size - CHUNK_LEN,
                                                     last_bit_rate, state, args))
                    in_compute.append(fsm.Trajectory(chunk_index, min(5, bit_rate + 1), buffer_size - CHUNK_LEN,
                                                     last_bit_rate, state, args))

                for traj in in_compute:
                    this_chunk_size = video_chunk_size
                    this_delay = delay
                    while True:
                        if traj.apply(this_chunk_size, this_delay) == CHUNK_SWITCH:
                            new_bitrate = int(policy.predict(np.array(serial(traj.states)).reshape(1, -1))[0])
                            traj.next_chunk(new_bitrate)
                            this_chunk_size, this_delay = traj.trans_msg
                        else:
                            break

                    while len(in_compute) > 1 and in_compute[0].end and in_compute[1].end and in_compute[2].end:
                        r_below = sum([get_reward(in_compute[0].quality[i], in_compute[0].rebuf[i],
                                                  in_compute[0].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)])
                        r_normal = sum([get_reward(in_compute[1].quality[i], in_compute[1].rebuf[i],
                                                  in_compute[1].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)])
                        r_above = sum([get_reward(in_compute[2].quality[i], in_compute[2].rebuf[i],
                                                  in_compute[2].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)])
                        if r_above == max(r_below, r_normal, r_above):
                            policy.update(in_compute[0].chunk_index, 1)
                        elif r_normal == max(r_below, r_normal, r_above):
                            policy.update(in_compute[0].chunk_index, -1)
                        else:
                            policy.update(in_compute[0].chunk_index, 0)

                        in_compute.pop(0)
                        in_compute.pop(0)
                        in_compute.pop(0)

            if end_of_video:
                if args.log:
                    log_file.write(bytes('\n', encoding='utf-8'))
                    log_file.close()
                    print("video count", video_count)

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here
                r_batch = []
                in_compute = []

                if viper_flag:
                    return rollout
                else:
                    video_count += 1
                    if video_count >= len(net_env.all_file_names):
                        break
                    if args.log:
                        log_path = log_f + '_' + net_env.all_file_names[net_env.trace_idx] + '_' + args.qoe_metric
                        log_file = open(log_path, 'wb')

        return reward_sum
示例#18
0
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace()

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    with torch.no_grad():
        model = a3c.ActorCritic(state_dim=[S_INFO, S_LEN],
                                action_dim=A_DIM,
                                learning_rate=[ACTOR_LR_RATE, CRITIC_LR_RATE],islstm = islstm)

        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            model.load_state_dict(torch.load(nn_model, map_location=torch.device('cpu')))
            print("Model restored.")

        state = torch.zeros(S_INFO, S_LEN)
        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = torch.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [torch.zeros(S_INFO, S_LEN)]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0


        cx = torch.zeros(1, 128)
        hx = torch.zeros(1, 128)

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smoothness
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write((str(time_stamp / M_IN_K) + '\t' +
                           str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                           str(buffer_size) + '\t' +
                           str(rebuf) + '\t' +
                           str(video_chunk_size) + '\t' +
                           str(delay) + '\t' +
                           str(reward) + '\n').encode("utf-8"))
            log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [torch.zeros((S_INFO, S_LEN))]

            state = torch.roll(state, -1)

            # Fill in the state vector with normalization
            state[0, -1] = torch.Tensor([VIDEO_BIT_RATE[last_bit_rate] / float(max(VIDEO_BIT_RATE))])  # last quality
            state[1, -1] = torch.Tensor([buffer_size / BUFFER_NORM_FACTOR])  # buffer size
            state[2, -1] = torch.Tensor([float(video_chunk_size) / float(delay) / M_IN_K])  # kilo byte / ms
            state[3, -1] = torch.Tensor([float(delay) / M_IN_K / BUFFER_NORM_FACTOR])  # /10 sec
            state[4, :A_DIM] = torch.Tensor([next_video_chunk_sizes]) / M_IN_K / M_IN_K  # mega byte
            # remaining chunk number
            state[5, -1] = torch.Tensor([min(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)])

            if islstm == 0:
                logits, value = model(state.unsqueeze(dim=0))
            else:
                logits, value, hx, cx = model((state.unsqueeze(dim=0),hx,cx))
            # print(f"index {index}, state {state}, logits {logits}, value {value}",sep="\n")
            # print(state,logits)
            try:
                cate         = Categorical(logits)
                bit_rate     = cate.sample().item()
            except Exception as e:
                print(e)
                print(f"walking into an error of all null distribution")
                print(logits, state)
                exit()

            policy       = logits
            log_policy   = torch.log(logits)
            entropy      = (policy * log_policy).sum(1, keepdim=True)

            s_batch.append(state)
            entropy_record.append(entropy)

            if end_of_video:
                log_file.write('\n'.encode("utf-8"))
                log_file.close()

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                cx = cx.detach()
                hx = hx.detach()

                action_vec = torch.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(torch.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                print ("video count", video_count)
                video_count += 1

                if video_count >= len(all_file_names):
                    break

                log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
                log_file = open(log_path, 'wb')
示例#19
0
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(TEST_TRACES)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'w')

    with tf.Session() as sess:
        actor = libcomyco.libcomyco(sess,
                S_INFO, S_LEN, A_DIM, LR_RATE = 1e-4)
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        if NN_MODEL is not None:  # NN_MODEL is the path to file
            saver.restore(sess, NN_MODEL)
            print("Testing model restored.")

        time_stamp = 0

        bit_rate = DEFAULT_QUALITY
        last_bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
                video_chunk_size, next_video_chunk_sizes, \
                end_of_video, video_chunk_remain = \
                    net_env.get_video_chunk(int(bit_rate))

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            if QOE_METRIC == 'lin':
            # -- lin scale reward --
                REBUF_PENALTY = REBUFF_PENALTY_LIN
                reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                        - REBUF_PENALTY * rebuf \
                        - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                                VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
                # reward_max = 4.3
            else:
            # -- log scale reward --
                REBUF_PENALTY = REBUFF_PENALTY_LOG
                log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[0]))
                log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[0]))

                reward = log_bit_rate \
                        - REBUF_PENALTY * rebuf \
                        - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)
            r_batch.append(reward)
            last_bit_rate = bit_rate

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(str(time_stamp / M_IN_K) + '\t' +
                           str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                           str(buffer_size) + '\t' +
                           str(rebuf) + '\t' +
                           str(video_chunk_size) + '\t' +
                           str(delay) + '\t' +
                           str(reward) + '\n')
            log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            action_prob, _ = actor.predict(
                np.reshape(state, (-1, S_INFO, S_LEN)))
            bit_rate = np.argmax(action_prob[0])

            s_batch.append(state)

            entropy_record.append(actor.compute_entropy(action_prob[0]))

            if end_of_video:
                log_file.write('\n')
                log_file.close()

                bit_rate = DEFAULT_QUALITY  # use the default action here
                last_bit_rate = DEFAULT_QUALITY

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                video_count += 1

                if video_count >= len(all_file_names):
                    break

                log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
                log_file = open(log_path, 'w')
示例#20
0
def main():
    # check the constant defination is valid or not
    assert len(bitRatesOptions) == bitRatesTypes

    # load the traces
    allCookedTime, allCookedBW, allFileNames = load_trace.load_trace()

    # set the environment
    netEnvironment = env.Environment(all_cooked_time=allCookedTime,
                                     all_cooked_bw=allCookedBW)

    # open the output log file to write
    outputFileName = outputFilePrefix + "_" + allFileNames[
        netEnvironment.trace_idx]
    outputFilePointer = open(outputFileName, "wb")

    # initial the local variables
    timeStamp = 0
    lastBitRateOption = defaultBitRateOption
    currentBitRateOption = defaultBitRateOption
    videoCount = 0
    historyState = np.zeros((stateInfoLength, pastFramesLength))

    # enum all possible solutions of future chunks
    for solution in itertools.product([i for i in range(bitRatesTypes)],
                                      repeat=defaultFutureChunkCount):
        chunkOptionsSet.append(solution)

    # computing kernel:
    while True:
        # get the video chunk according to the current bitrate option
        assert currentBitRateOption >= 0
        delay, sleepTime, currentBufferSize, rebuffer, currentVideoChunkSize, \
            nextVideoChunkSize, endFlag, chunkRemainCount = netEnvironment.get_video_chunk(currentBitRateOption)

        # update the time stamp because of the delay and sleeping time
        timeStamp += delay + sleepTime  # ms

        # calculate the reward value according to the formula
        qualityValue = bitRatesOptions[
            currentBitRateOption] / bitsFactor  # kb to Mb
        smoothValue = np.abs(bitRatesOptions[currentBitRateOption] \
                    - bitRatesOptions[lastBitRateOption]) / bitsFactor
        rewardValue =  qualityValue \
                    - rebufferFactor * rebuffer \
                    - smoothFactor * smoothValue

        # write the output file
        outputItemStr = str(timeStamp / millsecondsPerSecond) + '\t' \
                    + str(bitRatesOptions[currentBitRateOption]) + '\t' \
                    + str(currentBufferSize) + '\t' \
                    + str(rebuffer) + '\t' \
                    + str(currentVideoChunkSize) + '\t' \
                    + str(delay) + '\t' \
                    + str(rewardValue) + '\n'
        outputFilePointer.write(outputItemStr.encode('utf-8'))
        outputFilePointer.flush()

        # update the bit rate option
        lastBitRateOption = currentBitRateOption

        # update the history state information like a sliding window
        historyState = np.roll(historyState, -1, axis=1)
        historyState[
            0, -1] = bitRatesOptions[currentBitRateOption] / float(maxBitRate)
        historyState[1, -1] = currentBufferSize / bufferNormFactor
        historyState[2, -1] = rebuffer
        historyState[
            3, -1] = float(currentVideoChunkSize) / float(delay) / bitsFactor
        historyState[4, -1] = np.minimum(
            chunkRemainCount,
            defaultChunkCountToEnd) / float(defaultChunkCountToEnd)

        # MPC kernel begin
        # calculate the normaliztion estimated error of bandwidth
        currentError = 0.
        if (len(pastBWEsts) > 0):
            currentError = abs(pastBWEsts[-1] - historyState[3, -1]) / float(
                historyState[3, -1])
        pastErrors.append(currentError)

        # calculate the harmonic mean of last 5 history bandwidths
        # Step 1: collect the last 5 history bandwidths
        pastRealBWArray = historyState[3, -5:]
        while pastRealBWArray[0] == 0.0:
            pastRealBWArray = pastRealBWArray[1:]

        # Step 2: calculate the harmonic mean
        pastRealBWSum = 0.0
        for pastRealBWItems in pastRealBWArray:
            pastRealBWSum += (1 / float(pastRealBWItems))
        harmonicBW = 1.0 / (pastRealBWSum / len(pastRealBWArray))

        # calculate the predicted future bandwidth according to the est. error and harmonic mean
        errorIndex = min(5, len(pastErrors))
        maxError = float(max(pastErrors[-errorIndex:]))
        currentPredBW = harmonicBW / (1 + maxError)
        pastBWEsts.append(currentPredBW)  # fixed this bug, reward increases

        # get the video chunks information of this round prediction
        currentLastIndex = totalChunksCount - chunkRemainCount
        currentFutureChunkCount = min(chunkRemainCount,
                                      defaultFutureChunkCount)

        # enumerate all the possible solutions and pick the best one
        bestReward = -INF
        bestSolution = ()
        finalOption = -1
        startBufferSize = currentBufferSize

        for solution in chunkOptionsSet:
            localSolution = solution[0:currentFutureChunkCount]
            localRebufferTime = 0.0
            localCurrentBufferSize = startBufferSize
            localBitRateSum = 0.
            localSmoothDiffs = 0.
            localLastChunkOption = currentBitRateOption
            # the 5 future chunks loop
            for pos in range(0, currentFutureChunkCount):
                thisChunkOption = localSolution[pos]
                thisIndex = currentLastIndex + pos + 1
                thisChunkSize = getChunkSize(thisChunkOption, thisIndex)
                downloadTime = (float(thisChunkSize) /
                                (bitsFactor * bitsFactor)
                                ) / currentPredBW  # Bytes to MBytes
                if localCurrentBufferSize < downloadTime:
                    localRebufferTime += downloadTime - localCurrentBufferSize
                    localCurrentBufferSize = 0
                else:
                    localCurrentBufferSize -= downloadTime
                # This 4 means the play speed
                localCurrentBufferSize += 4
                localBitRateSum += bitRatesOptions[thisChunkOption]
                localSmoothDiffs += abs(bitRatesOptions[thisChunkOption] -
                                        bitRatesOptions[localLastChunkOption])
                localLastChunkOption = thisChunkOption

            localReward = float(localBitRateSum) / bitsFactor \
                             - rebufferFactor * localRebufferTime \
                             - float(localSmoothDiffs) / bitsFactor
            if localReward >= bestReward:
                if bestSolution != () and bestSolution[0] < localSolution[0]:
                    bestSolution = localSolution
                else:
                    bestSolution = localSolution
                bestReward = localReward
                if bestSolution != ():
                    finalOption = bestSolution[0]
        currentBitRateOption = finalOption

        if endFlag:
            outputFilePointer.write("\n".encode('utf-8'))
            outputFilePointer.close()

            lastBitRateOption = defaultBitRateOption
            currentBitRateOption = defaultBitRateOption
            historyState = np.zeros((stateInfoLength, pastFramesLength))

            print("video count", videoCount)
            videoCount += 1

            if videoCount >= len(allFileNames):
                break

            outputFileName = outputFilePrefix + "_naive_" + allFileNames[
                netEnvironment.trace_idx]
            outputFilePointer = open(outputFileName, "wb")
示例#21
0
            # randomize the start point of the video
            # note: trace file starts with time 0
            self.mahimahi_ptr = np.random.randint(1, len(self.cooked_bw))
            self.last_mahimahi_time = self.cooked_time[self.mahimahi_ptr - 1]

        next_video_chunk_sizes = []
        for i in range(BITRATE_LEVELS):
            next_video_chunk_sizes.append(
                self.video_size[i][self.video_chunk_counter])

        # delay - time from click until start to play
        # sleep_time - sleep when buffer size > buffer_tresh
        # buffer size
        # rebufed time
        return delay, \
            sleep_time, \
            return_buffer_size / MILLISECONDS_IN_SECOND, \
            rebuf / MILLISECONDS_IN_SECOND, \
            video_chunk_size, \
            next_video_chunk_sizes, \
            end_of_video, \
            video_chunk_remain


if __name__ == "__main__":
    all_cooked_time, all_cooked_bw, _ = load_trace()
    env = Environment(all_cooked_time, all_cooked_bw)
    env.get_video_chunk(0)
    env.get_video_chunk(1)
示例#22
0
# train path
NN_MODEL = None
# NN_MODEL = './a2c_results_test/nn_model_ep_91.ckpt' #  can load trained model
NETWORK_TRACE = 'fixed'
VIDEO_TRACE = 'AsianCup_China_Uzbekistan'
VIDEO_TRACE_list = [
    'AsianCup_China_Uzbekistan', 'Fengtimo_2018_11_3', 'game', 'room', 'sports'
]
network_trace_dir = './dataset/network_trace/' + NETWORK_TRACE + '/'
video_trace_prefix = './dataset/video_trace/' + VIDEO_TRACE + '/frame_trace_'
LOG_FILE_PATH = './log/'
SUMMARY_DIR = './L2AC_results'  # trained model path

# load the network trace
all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
    network_trace_dir)

# defalut setting
epoch_reward = 0
last_bit_rate = 0
bit_rate = 0
target_buffer = 0
state = np.zeros((S_DIM, S_LEN))
thr_record = np.zeros(8)

# plot info
idx = 0
id_list = []
bit_rate_record = []
buffer_record = []
throughput_record = []
示例#23
0
    parser.add_argument('-q', '--qoe-metric', choices=['lin', 'log', 'hd'])
    parser.add_argument('-l', '--log', action='store_true')
    parser.add_argument('-i', '--lin', action='store_true')
    parser.add_argument('-m', '--iters', type=int)
    parser.add_argument('-t', '--traces', choices=['norway', 'fcc', 'oboe'])

    args = parser.parse_args()
    n_batch_rollouts = 10
    max_iters = args.iters
    max_pts = 200000
    train_frac = 0.8
    np.random.seed(RANDOM_SEED)
    states, actions, serials = [], [], []
    precision = []
    #trees = []
    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        args.traces)
    if args.abr == 'hotdash':
        net_env = env_hotdash.Environment(all_cooked_time=all_cooked_time,
                                          all_cooked_bw=all_cooked_bw,
                                          all_file_names=all_file_names)
    else:
        net_env = env.Environment(all_cooked_time=all_cooked_time,
                                  all_cooked_bw=all_cooked_bw,
                                  all_file_names=all_file_names)

    if args.abr == 'pensieve':
        teacher = pensieve.Pensieve()
        student = pensilin.Pensilin()
        #test = pensieve.Pensieve()
    elif args.abr == 'robustmpc':
        teacher = robustmpc.RobustMPC()
示例#24
0
def loopmain():
    pool_ = pool.pool()

    video_size = {}  # in bytes
    vmaf_size = {}
    for bitrate in range(BITRATE_LEVELS):
        video_size[bitrate] = []
        vmaf_size[bitrate] = []
        with open(VIDEO_SIZE_FILE + str(bitrate)) as f:
            for line in f:
                video_size[bitrate].append(int(line.split()[0]))
        with open(VMAF + str(BITRATE_LEVELS - bitrate)) as f:
            for line in f:
                vmaf_size[bitrate].append(float(line))

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        TRAIN_TRACES)
    net_env = env.Environment(TRAIN_TRACES)
    with open(LOG_FILE + 'agent', 'w') as log_file:
        actor = a3c.ActorNetwork(state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 max_depth=6)
        bit_rate = DEFAULT_QUALITY
        last_chunk_vmaf = None

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        a_real_batch = [action_vec]
        r_batch = []

        time_stamp = 0

        throu_array, delay_array = [], []
        epoch = 0

        while True:
            net_env.get_video_chunk(int(bit_rate))

            #next_video_chunk_sizes, next_video_chunk_vmaf, \
            delay, sleep_time, buffer_size, rebuf, video_chunk_size, \
                end_of_video, video_chunk_remain, video_chunk_vmaf = \
                net_env.delay0, net_env.sleep_time0, net_env.return_buffer_size0, net_env.rebuf0, \
                net_env.video_chunk_size0, net_env.end_of_video0, net_env.video_chunk_remain0, net_env.video_chunk_vmaf0

            next_video_chunk_sizes = []
            for i in range(A_DIM):
                next_video_chunk_sizes.append(
                    video_size[i][net_env.video_chunk_counter])

            next_video_chunk_vmaf = []
            for i in range(A_DIM):
                next_video_chunk_vmaf.append(
                    vmaf_size[i][net_env.video_chunk_counter])

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            if last_chunk_vmaf is None:
                last_chunk_vmaf = video_chunk_vmaf

            reward = 0.8469011 * video_chunk_vmaf - 28.79591348 * rebuf + 0.29797156 * \
                np.abs(np.maximum(video_chunk_vmaf - last_chunk_vmaf, 0.)) - 1.06099887 * \
                np.abs(np.minimum(video_chunk_vmaf - last_chunk_vmaf, 0.)) - \
                2.661618558192494

            r_batch.append(reward)

            last_bit_rate = bit_rate
            last_chunk_vmaf = video_chunk_vmaf

            state = np.zeros([S_INFO, S_LEN])

            throughput = video_chunk_size / delay / M_IN_K
            throu_array.append(throughput)
            delay_array.append(delay)
            if len(throu_array) >= FUTURE_P:
                throu_array.pop(0)
                delay_array.pop(0)
            mean, var = mean_var(throu_array, delay_array)
            # this should be S_INFO number of terms
            # state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            state[0, -1] = video_chunk_vmaf
            state[1, -1] = buffer_size  # 10 sec
            state[2, -1] = mean
            state[3, -1] = var  # 10 sec

            action_prob = actor.predict(np.reshape(state, (-1, S_INFO, S_LEN)))

            net_env.get_optimal(float(last_chunk_vmaf))
            action_real = int(net_env.optimal)
            # force robust
            if actor.compute_entropy(action_prob) > ENTROPY_THRES:
                action_cumsum = np.cumsum(action_prob)
                bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                            float(RAND_RANGE)).argmax()
            else:
                bit_rate = np.random.randint(A_DIM)

            action_vec = np.zeros(A_DIM)
            action_vec[bit_rate] = 1

            action_real_vec = np.zeros(A_DIM)
            action_real_vec[action_real] = 1

            pool_.submit(state, action_real_vec)

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                str(buffer_size) + '\t' + str(rebuf) + '\t' +
                str(video_chunk_size) + '\t' + str(delay) + '\t' +
                str(VIDEO_BIT_RATE[action_real]) + '\t' + str(reward) + '\n')
            log_file.flush()

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN or end_of_video:
                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del a_real_batch[:]
                throu_array, delay_array = [], []
                # so that in the log we know where video ends
                log_file.write('\n')

            # store the state and action into batches
            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here
                last_chunk_vmaf = None
                #chunk_index = 0

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                action_real_vec = np.zeros(A_DIM)
                action_real_vec[action_real] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                a_real_batch.append(action_real_vec)

                epoch += 1
                if epoch % 10 == 0:
                    print(time.time())
                    training_s_batch, training_a_batch = pool_.get()
                    if training_s_batch.shape[0] > 0:
                        actor.train(np.array(training_s_batch),
                                    np.array(training_a_batch))
                    actor.save('pitree/pitree' + str(epoch) + '.model')
                    os.system('python dt_test.py ' + 'pitree/pitree' +
                              str(epoch) + '.model')
                    os.system('python plot_results.py >> tab.log')
                #d_batch.append(np.zeros((3, 5)))

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                a_batch.append(action_vec)
                a_real_batch.append(action_vec)
示例#25
0
def test(user_id,ABR_NAME_,QoE_,NETWORK_TRACE_,VIDEO_TRACE_):
    #1  Algorithm Setting:  RBA, BBA, DYNAMIC, PDDQN, Pensieve
    ABR_NAME = ABR_NAME_
    #2  QoE Setting:  ar, al, hd, b, max
    QoE = QoE_
    #3  Network Dataset: high,  medium, low, fixed
    NETWORK_TRACE = NETWORK_TRACE_
    #4  Video Dataset: AsianCup_China_Uzbekistan, Fengtimo_2018_11_3, YYF_2018_08_12
    VIDEO_TRACE = VIDEO_TRACE_

    model_name = ""

    if ABR_NAME == 'BBA':
        import BBA as ABR
    if ABR_NAME == 'RBA':
        import RBA as ABR
    if ABR_NAME == 'DYNAMIC':
        import DYNAMIC as ABR
    if ABR_NAME == 'PDDQN':
        model_name = "./PDDQN_models/PDDQN_b/"
        import PDDQN_ as ABR
    if ABR_NAME == 'PDDQN-R':
        model_name = "./PDDQN_models/"+QoE+'/'
        import PDDQN_R as ABR
    if ABR_NAME == 'Pensieve':
        model_name = "./Pensieve_models/"+QoE+'/'
        import Pensieve as ABR

    SMOOTH_PENALTY = 0
    REBUF_PENALTY = 0.0
    LANTENCY_PENALTY = 0.0
    SKIP_PENALTY = 0.0
    BITRATE_REWARD = 0.0

    if QoE == 'al':
        SMOOTH_PENALTY = 0.01
        REBUF_PENALTY = 1.5
        LANTENCY_PENALTY = 0.01
        BITRATE_REWARD = 0.001
        SKIP_PENALTY = 1
    if QoE == 'ar':
        SMOOTH_PENALTY = 0.0
        REBUF_PENALTY = 3
        LANTENCY_PENALTY = 0.0
        BITRATE_REWARD = 0.001
        SKIP_PENALTY = 0.0
    if QoE == 'b':
        SMOOTH_PENALTY = 0.02
        REBUF_PENALTY = 1.5
        LANTENCY_PENALTY = 0.005
        BITRATE_REWARD = 0.001
        SKIP_PENALTY = 0.5
    if QoE == 'hd':
        SMOOTH_PENALTY = 0.0
        REBUF_PENALTY = 0.5
        LANTENCY_PENALTY = 0.0
        BITRATE_REWARD = 0.001
        SKIP_PENALTY = 0.0

    if QoE == 'max':
        SMOOTH_PENALTY = 0
        REBUF_PENALTY = 0.0
        LANTENCY_PENALTY = 0.0
        SKIP_PENALTY = 0.0
        BITRATE_REWARD = 0.001
        FILE_NAME = './'+'result/'+QoE+'_'+NETWORK_TRACE+'_'+VIDEO_TRACE+'.csv'
    else:
        FILE_NAME = './'+'result/'+ABR_NAME+'_'+QoE+'_'+NETWORK_TRACE+'_'+VIDEO_TRACE+'.csv'

    FILE_NAME = './' + 'result/Startup/' + NETWORK_TRACE +'/'+ABR_NAME+ '/QoE.csv'
    out = open(FILE_NAME, 'w', newline='')
    w = csv.writer(out)

    DEBUG = False

    LOG_FILE_PATH = './log/'

    # create result directory
    if not os.path.exists(LOG_FILE_PATH):
        os.makedirs(LOG_FILE_PATH)

    # -- End Configuration --

    network_trace_dir = './dataset/new_network_trace/' + NETWORK_TRACE + '/'
    video_trace_prefix = './dataset/video_trace/' + VIDEO_TRACE + '/frame_trace_'

    # load the trace
    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(network_trace_dir)
    start_avgbw = (sum(all_cooked_bw[0][0:10])/10) *1000

    # random_seed
    random_seed = 2
    count = 0
    trace_count = 1
    FPS = 25
    frame_time_len = 0.04
    reward_all_sum = 0
    run_time = 0

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                                    all_cooked_bw=all_cooked_bw,
                                    random_seed=random_seed,
                                    logfile_path=LOG_FILE_PATH,
                                    VIDEO_SIZE_FILE=video_trace_prefix,
                                    Debug=DEBUG)

    abr = ABR.Algorithm()
    abr_init = abr.Initial(model_name)

    BIT_RATE = [500.0, 850.0, 1200.0, 1850.0]  # kpbs
    TARGET_BUFFER = [0.5,0.75,1,1.25]  # seconds
    # ABR setting
    RESEVOIR = 0.5
    CUSHION = 2

    cnt = 0
    # defalut setting
    last_bit_rate = 0
    bit_rate = 0
    target_buffer = 0
    latency_limit = 4

    # reward setting
    reward_frame = 0
    reward_all = 0

    # past_info setting
    past_frame_num = 200
    S_time_interval = [0] * past_frame_num
    S_send_data_size = [0] * past_frame_num
    S_chunk_len = [0] * past_frame_num
    S_rebuf = [0] * past_frame_num
    S_buffer_size = [0] * past_frame_num
    S_end_delay = [0] * past_frame_num
    S_chunk_size = [0] * past_frame_num
    S_play_time_len = [0] * past_frame_num
    S_decision_flag = [0] * past_frame_num
    S_buffer_flag = [0] * past_frame_num
    S_cdn_flag = [0] * past_frame_num
    S_skip_time = [0] * past_frame_num
    # params setting
    call_time_sum = 0
    reward_chunk = 0
    while True:

        reward_frame = 0

        time, time_interval, send_data_size, chunk_len, \
        rebuf, buffer_size, play_time_len, end_delay, \
        cdn_newest_id, download_id, cdn_has_frame, skip_frame_time_len, decision_flag, \
        buffer_flag, cdn_flag, skip_flag, end_of_video = net_env.get_video_frame(bit_rate, target_buffer, latency_limit)
        # S_info is sequential order
        S_time_interval.pop(0)
        S_send_data_size.pop(0)
        S_chunk_len.pop(0)
        S_buffer_size.pop(0)
        S_rebuf.pop(0)
        S_end_delay.pop(0)
        S_play_time_len.pop(0)
        S_decision_flag.pop(0)
        S_buffer_flag.pop(0)
        S_cdn_flag.pop(0)
        S_skip_time.pop(0)

        S_time_interval.append(time_interval)
        S_send_data_size.append(send_data_size)
        S_chunk_len.append(chunk_len)
        S_buffer_size.append(buffer_size)
        S_rebuf.append(rebuf)
        S_end_delay.append(end_delay)
        S_play_time_len.append(play_time_len)
        S_decision_flag.append(decision_flag)
        S_buffer_flag.append(buffer_flag)
        S_cdn_flag.append(cdn_flag)
        S_skip_time.append(skip_frame_time_len)

        # QOE setting
        # if end_delay <= 1.0:
        #     LANTENCY_PENALTY = 0.005
        # else:
        #     LANTENCY_PENALTY = 0.01

        if not cdn_flag:
            reward_frame = frame_time_len * float(BIT_RATE[
                                                      bit_rate]) * BITRATE_REWARD - REBUF_PENALTY * rebuf - LANTENCY_PENALTY * end_delay - SKIP_PENALTY * skip_frame_time_len
        else:
            reward_frame = -(REBUF_PENALTY * rebuf)

        if decision_flag or end_of_video:
            reward_frame += -1 * SMOOTH_PENALTY * (abs(BIT_RATE[bit_rate] - BIT_RATE[last_bit_rate]) / 1000)
            reward_chunk += reward_frame
            w.writerow([ABR_NAME,reward_chunk])
            reward_chunk = 0
            last_bit_rate = bit_rate

            # ----------------- Your Algorithm ---------------------

            cnt += 1
            timestamp_start = tm.time()
            bit_rate, target_buffer, latency_limit = abr.run(time,
                                                             S_time_interval,
                                                             S_send_data_size,
                                                             S_chunk_len,
                                                             S_rebuf,
                                                             S_buffer_size,
                                                             S_play_time_len,
                                                             S_end_delay,
                                                             S_decision_flag,
                                                             S_buffer_flag,
                                                             S_cdn_flag,
                                                             S_skip_time,
                                                             end_of_video,
                                                             cdn_newest_id,
                                                             download_id,
                                                             cdn_has_frame,
                                                             abr_init,
                                                             start_avgbw)
            start_avgbw = -1
            timestamp_end = tm.time()
            call_time_sum += timestamp_end - timestamp_start
            # -------------------- End --------------------------------
        else:
            reward_chunk += reward_frame
        if end_of_video:
            break




            # print("network traceID, network_reward, avg_running_time", trace_count, reward_all, call_time_sum / cnt)

            reward_all = reward_all/cnt
            reward_all_sum += reward_all
            run_time += call_time_sum / cnt
            if trace_count >= len(all_file_names):
                break
            trace_count += 1
            cnt = 0

            call_time_sum = 0
            last_bit_rate = 0
            reward_all = 0
            bit_rate = 0
            target_buffer = 0

            S_time_interval = [0] * past_frame_num
            S_send_data_size = [0] * past_frame_num
            S_chunk_len = [0] * past_frame_num
            S_rebuf = [0] * past_frame_num
            S_buffer_size = [0] * past_frame_num
            S_end_delay = [0] * past_frame_num
            S_chunk_size = [0] * past_frame_num
            S_play_time_len = [0] * past_frame_num
            S_decision_flag = [0] * past_frame_num
            S_buffer_flag = [0] * past_frame_num
            S_cdn_flag = [0] * past_frame_num

        reward_all += reward_frame

    return [reward_all_sum / trace_count, run_time / trace_count]
示例#26
0
    def main(self, args, net_env=None, policy=None):
        np.random.seed(RANDOM_SEED)
        viper_flag = True
        assert len(VIDEO_BIT_RATE) == A_DIM
        log_f = LOG_FILE

        if net_env is None:
            viper_flag = False
            all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
                args.traces)
            net_env = env.Environment(all_cooked_time=all_cooked_time,
                                      all_cooked_bw=all_cooked_bw,
                                      all_file_names=all_file_names)

        # if args.update:
        #     log_f = log_f.replace('dt', 'du')

        if not viper_flag and args.log:
            log_path = LOG_FILE + '_' + net_env.all_file_names[
                net_env.trace_idx] + '_' + args.qoe_metric
            log_file = open(log_path, 'wb')

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        s_batch = [np.zeros((S_INFO, S_LEN))]
        # a_batch = np.zeros((TOTAL_VIDEO_CHUNKS, 3))
        r_batch = []
        rollout = []
        video_count = 0
        reward_sum = 0
        in_compute = []

        # load dt policy
        if policy is None:
            with open(args.dt, 'rb') as f:
                policy = pk.load(f)
        policy = fsm.FSM(policy)

        # ========= @ zili: debug ========
        # with open('decision_tree_ready/robustmpc_norway_500.pk3', 'rb') as f:
        #     baseline = pk.load(f)

        while True:  # serve video forever

            delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \
            video_chunk_remain = net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            reward = get_reward(bit_rate, rebuf, last_bit_rate,
                                args.qoe_metric)
            r_batch.append(reward)
            reward_sum += reward
            last_bit_rate = bit_rate

            if args.log:
                # log time_stamp, bit_rate, buffer_size, reward
                log_file.write(
                    bytes(str(time_stamp / M_IN_K) + '\t' +
                          str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                          str(buffer_size) + '\t' + str(rebuf) + '\t' +
                          str(video_chunk_size) + '\t' + str(delay) + '\t' +
                          str(reward) + '\n',
                          encoding='utf-8'))
                log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR
            state[2, -1] = rebuf
            state[3, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[4, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)
            # state[5: 10, :] = future_chunk_sizes / M_IN_K / M_IN_K

            serialized_state = serial(state)
            bit_rate = int(policy.predict([serialized_state])[0])
            rollout.append((state, bit_rate, serialized_state))
            s_batch.append(state)

            # ======== @ zili: debug ========
            # if video_chunk_remain > 0:
            #     a_batch[TOTAL_VIDEO_CHUNKS - video_chunk_remain][0] = bit_rate
            #     a_batch[TOTAL_VIDEO_CHUNKS - video_chunk_remain][2] = int(baseline.predict([serialized_state])[0])

            # if args.update:
            #     chunk_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain)
            #     policy.chunk_leaf[chunk_index] = policy.tree.apply(np.array(serialized_state).reshape(1, -1))
            #     if chunk_index < CHUNK_TIL_VIDEO_END_CAP - HORIZON:
            #         in_compute.append(fsm.Trajectory(chunk_index, max(0, bit_rate - 1), buffer_size - CHUNK_LEN,
            #                                          last_bit_rate, state, args))
            #         in_compute.append(fsm.Trajectory(chunk_index, bit_rate, buffer_size - CHUNK_LEN,
            #                                          last_bit_rate, state, args))
            #         in_compute.append(fsm.Trajectory(chunk_index, min(5, bit_rate + 1), buffer_size - CHUNK_LEN,
            #                                          last_bit_rate, state, args))
            #
            #     for traj in in_compute:
            #         this_chunk_size = video_chunk_size
            #         this_delay = delay
            #         while True:
            #             if traj.apply(this_chunk_size, this_delay) == CHUNK_SWITCH:
            #                 new_bitrate = int(policy.predict(np.array(serial(traj.states)).reshape(1, -1))[0])
            #                 traj.next_chunk(new_bitrate)
            #                 this_chunk_size, this_delay = traj.trans_msg
            #             else:
            #                 break
            #
            #         while len(in_compute) > 1 and in_compute[0].end and in_compute[1].end and in_compute[2].end:
            #             r_below = sum([get_reward(in_compute[0].quality[i], in_compute[0].rebuf[i],
            #                                       in_compute[0].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)])
            #             r_normal = sum([get_reward(in_compute[1].quality[i], in_compute[1].rebuf[i],
            #                                       in_compute[1].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)])
            #             r_above = sum([get_reward(in_compute[2].quality[i], in_compute[2].rebuf[i],
            #                                       in_compute[2].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)])
            #             if r_above == max(r_below, r_normal, r_above):
            #                 policy.update(in_compute[0].chunk_index, 1)
            #                 # a_batch[in_compute[0].chunk_index][1] = in_compute[0].chunk_init_bitrate
            #             elif r_normal == max(r_below, r_normal, r_above):
            #                 policy.update(in_compute[0].chunk_index, -1)
            #                 # a_batch[in_compute[1].chunk_index][1] = in_compute[1].chunk_init_bitrate
            #             else:
            #                 policy.update(in_compute[0].chunk_index, 0)
            #                 # a_batch[in_compute[2].chunk_index][1] = in_compute[2].chunk_init_bitrate
            #
            #             in_compute.pop(0)
            #             in_compute.pop(0)
            #             in_compute.pop(0)

            if end_of_video:
                # print(a_batch)
                if args.log:
                    log_file.write(bytes('\n', encoding='utf-8'))
                    log_file.close()
                    print("video count", video_count)

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here
                r_batch = []
                in_compute = []

                if viper_flag:
                    return rollout
                else:
                    video_count += 1
                    if video_count >= len(net_env.all_file_names):
                        break
                    if args.log:
                        log_path = log_f + '_' + net_env.all_file_names[
                            net_env.trace_idx] + '_' + args.qoe_metric
                        log_file = open(log_path, 'wb')

        return reward_sum
示例#27
0
def main():
    # utility_offset = -math.log(VIDEO_BIT_RATE[0])  # so utilities[0] = 0
    # utilities = [math.log(b) + utility_offset for b in VIDEO_BIT_RATE]

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, _ = load_trace.load_trace()
    load_trace.plot_bandwidth(all_cooked_time, all_cooked_bw, _)
    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    with tf.Session() as sess, open(LOG_FILE, 'w') as log_file:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        summary_ops, summary_vars = a3c.build_summaries()

        sess.run(tf.global_variables_initializer())
        writer = tf.summary.FileWriter(SUMMARY_DIR,
                                       sess.graph)  # training monitor
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            saver.restore(sess, nn_model)
            print("Model restored.")

        epoch = 0
        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        actor_gradient_batch = []
        critic_gradient_batch = []

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_counter,throughput,video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)
            #print(net_env.get_video_chunk(bit_rate))
            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smooth penalty
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
            r_batch.append(reward)

            last_bit_rate = bit_rate

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)
            # print(state)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(
                next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            # print('state',state)
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            rand = np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)
            print(action_cumsum, action_cumsum > rand,
                  (action_cumsum > rand).argmax())
            # print(action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE))
            # print(action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()

            #compute Vp and map bitrate
            # bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()

            Vp_index = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()
            Vp = BUFFER_PARAMETER[Vp_index]
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            config = {
                'buffer_size': env.BUFFER_THRESH,
                'gp': GP,
                'Vp': Vp,
                'abr_osc': False,
                'abr_basic': False,
                'no_ibr': False
            }
            bola = get_bitrate.Bola(config=config)
            bit_rate = bola.get_quality(
                Vp, buffer_size * env.MILLISECONDS_IN_SECOND, last_bit_rate,
                throughput)

            #决策前的信息
            print(
                '[%d]:download time %.2fms,thrput=%.2f,chunk size %d,buffer=%.2fs,bitrate=%d'
                % (video_chunk_counter, throughput, delay, video_chunk_size,
                   buffer_size, last_bit_rate))

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                str(buffer_size) + '\t' + str(rebuf) + '\t' +
                str(video_chunk_size) + '\t' + str(delay) + '\t' +
                str(reward) + '\n')
            log_file.flush()

            if len(r_batch
                   ) >= TRAIN_SEQ_LEN or end_of_video:  # do training once

                actor_gradient, critic_gradient, td_batch = \
                    a3c.compute_gradients(s_batch=np.stack(s_batch[1:], axis=0),  # ignore the first chuck
                                          a_batch=np.vstack(a_batch[1:]),  # since we don't have the
                                          r_batch=np.vstack(r_batch[1:]),  # control over it
                                          terminal=end_of_video, actor=actor, critic=critic)
                td_loss = np.mean(td_batch)

                actor_gradient_batch.append(actor_gradient)
                critic_gradient_batch.append(critic_gradient)

                print("====")
                print("Epoch", epoch)
                print("TD_loss", td_loss, "Avg_reward", np.mean(r_batch),
                      "Avg_entropy", np.mean(entropy_record))
                print("====")

                summary_str = sess.run(summary_ops,
                                       feed_dict={
                                           summary_vars[0]: td_loss,
                                           summary_vars[1]: np.mean(r_batch),
                                           summary_vars[2]:
                                           np.mean(entropy_record)
                                       })

                writer.add_summary(summary_str, epoch)
                writer.flush()

                entropy_record = []

                if len(actor_gradient_batch) >= GRADIENT_BATCH_SIZE:

                    assert len(actor_gradient_batch) == len(
                        critic_gradient_batch)
                    # assembled_actor_gradient = actor_gradient_batch[0]
                    # assembled_critic_gradient = critic_gradient_batch[0]
                    # assert len(actor_gradient_batch) == len(critic_gradient_batch)
                    # for i in xrange(len(actor_gradient_batch) - 1):
                    #     for j in xrange(len(actor_gradient)):
                    #         assembled_actor_gradient[j] += actor_gradient_batch[i][j]
                    #         assembled_critic_gradient[j] += critic_gradient_batch[i][j]
                    # actor.apply_gradients(assembled_actor_gradient)
                    # critic.apply_gradients(assembled_critic_gradient)

                    for i in range(len(actor_gradient_batch)):
                        actor.apply_gradients(actor_gradient_batch[i])
                        critic.apply_gradients(critic_gradient_batch[i])

                    actor_gradient_batch = []
                    critic_gradient_batch = []

                    epoch += 1
                    if epoch % MODEL_SAVE_INTERVAL == 0:
                        # Save the neural net parameters to disk.
                        save_path = saver.save(
                            sess, SUMMARY_DIR + "/nn_model_ep_" + str(epoch) +
                            ".ckpt")
                        print("Model saved in file: %s" % save_path)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                # print(bit_rate)
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace()

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    action_vec = np.zeros(A_DIM)
    action_vec[bit_rate] = 1

    s_batch = [np.zeros((S_INFO, S_LEN))]
    a_batch = [action_vec]
    r_batch = []
    entropy_record = []

    video_count = 0

    # make chunk combination options
    for combo in itertools.product([0, 1, 2, 3, 4, 5], repeat=5):
        CHUNK_COMBO_OPTIONS.append(combo)

    while True:  # serve video forever
        # the action is from the last decision
        # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
        video_chunk_size, \
        end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        # reward is video quality - rebuffer penalty
        reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                 - REBUF_PENALTY * rebuf \
                 - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                           VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

        # log scale reward
        # log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[0]))
        # log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[0]))

        # reward = log_bit_rate \
        #          - REBUF_PENALTY * rebuf \
        #          - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)

        # reward = BITRATE_REWARD[bit_rate] \
        #          - 8 * rebuf - np.abs(BITRATE_REWARD[bit_rate] - BITRATE_REWARD[last_bit_rate])

        r_batch.append(reward)

        last_bit_rate = bit_rate

        # log time_stamp, bit_rate, buffer_size, reward
        log_file.write(
            str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) +
            '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' +
            str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) +
            '\n')
        log_file.flush()

        # retrieve previous state
        if len(s_batch) == 0:
            state = [np.zeros((S_INFO, S_LEN))]
        else:
            state = np.array(s_batch[-1], copy=True)

        # dequeue history record
        state = np.roll(state, -1, axis=1)

        # this should be S_INFO number of terms
        state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
            np.max(VIDEO_BIT_RATE))  # last quality
        state[1, -1] = buffer_size / BUFFER_NORM_FACTOR
        state[2, -1] = rebuf
        state[3, -1] = float(video_chunk_size) / float(
            delay) / M_IN_K  # kilo byte / ms
        state[4, -1] = np.minimum(
            video_chunk_remain,
            CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)
        # state[5: 10, :] = future_chunk_sizes / M_IN_K / M_IN_K

        # ================== MPC =========================
        curr_error = 0  # defualt assumes that this is the first request so error is 0 since we have never predicted bandwidth
        if (len(past_bandwidth_ests) > 0):
            curr_error = abs(past_bandwidth_ests[-1] - state[3, -1]) / float(
                state[3, -1])
        past_errors.append(curr_error)

        # pick bitrate according to MPC
        # first get harmonic mean of last 5 bandwidths
        past_bandwidths = state[3, -5:]
        while past_bandwidths[0] == 0.0:
            past_bandwidths = past_bandwidths[1:]
        #if ( len(state) < 5 ):
        #    past_bandwidths = state[3,-len(state):]
        #else:
        #    past_bandwidths = state[3,-5:]
        bandwidth_sum = 0
        for past_val in past_bandwidths:
            bandwidth_sum += (1 / float(past_val))
        harmonic_bandwidth = 1.0 / (bandwidth_sum / len(past_bandwidths))

        # future bandwidth prediction
        # divide by 1 + max of last 5 (or up to 5) errors
        max_error = 0
        error_pos = -5
        if (len(past_errors) < 5):
            error_pos = -len(past_errors)
        max_error = float(max(past_errors[error_pos:]))
        future_bandwidth = harmonic_bandwidth / (1 + max_error
                                                 )  # robustMPC here
        past_bandwidth_ests.append(harmonic_bandwidth)

        # future chunks length (try 4 if that many remaining)
        last_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain)
        future_chunk_length = MPC_FUTURE_CHUNK_COUNT
        if (TOTAL_VIDEO_CHUNKS - last_index < 5):
            future_chunk_length = TOTAL_VIDEO_CHUNKS - last_index

        # all possible combinations of 5 chunk bitrates (9^5 options)
        # iterate over list and for each, compute reward and store max reward combination
        max_reward = -100000000
        best_combo = ()
        start_buffer = buffer_size
        #start = time.time()
        for full_combo in CHUNK_COMBO_OPTIONS:
            combo = full_combo[0:future_chunk_length]
            # calculate total rebuffer time for this combination (start with start_buffer and subtract
            # each download time and add 2 seconds in that order)
            curr_rebuffer_time = 0
            curr_buffer = start_buffer
            bitrate_sum = 0
            smoothness_diffs = 0
            last_quality = int(bit_rate)
            for position in range(0, len(combo)):
                chunk_quality = combo[position]
                index = last_index + position + 1  # e.g., if last chunk is 3, then first iter is 3+0+1=4
                download_time = (
                    get_chunk_size(chunk_quality, index) /
                    1000000.) / future_bandwidth  # this is MB/MB/s --> seconds
                if (curr_buffer < download_time):
                    curr_rebuffer_time += (download_time - curr_buffer)
                    curr_buffer = 0
                else:
                    curr_buffer -= download_time
                curr_buffer += 4
                bitrate_sum += VIDEO_BIT_RATE[chunk_quality]
                smoothness_diffs += abs(VIDEO_BIT_RATE[chunk_quality] -
                                        VIDEO_BIT_RATE[last_quality])
                # bitrate_sum += BITRATE_REWARD[chunk_quality]
                # smoothness_diffs += abs(BITRATE_REWARD[chunk_quality] - BITRATE_REWARD[last_quality])
                last_quality = chunk_quality
            # compute reward for this combination (one reward per 5-chunk combo)
            # bitrates are in Mbits/s, rebuffer in seconds, and smoothness_diffs in Mbits/s

            reward = (bitrate_sum / 1000.) - (
                REBUF_PENALTY * curr_rebuffer_time) - (smoothness_diffs /
                                                       1000.)
            # reward = bitrate_sum - (8*curr_rebuffer_time) - (smoothness_diffs)

            if (reward >= max_reward):
                if (best_combo != ()) and best_combo[0] < combo[0]:
                    best_combo = combo
                else:
                    best_combo = combo
                max_reward = reward
                # send data to html side (first chunk of best combo)
                send_data = 0  # no combo had reward better than -1000000 (ERROR) so send 0
                if (best_combo != ()):  # some combo was good
                    send_data = best_combo[0]

        bit_rate = send_data
        # hack
        # if bit_rate == 1 or bit_rate == 2:
        #    bit_rate = 0

        # ================================================

        # Note: we need to discretize the probability into 1/RAND_RANGE steps,
        # because there is an intrinsic discrepancy in passing single state and batch states

        s_batch.append(state)

        if end_of_video:
            log_file.write('\n')
            log_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here

            del s_batch[:]
            del a_batch[:]
            del r_batch[:]

            action_vec = np.zeros(A_DIM)
            action_vec[bit_rate] = 1

            s_batch.append(np.zeros((S_INFO, S_LEN)))
            a_batch.append(action_vec)
            entropy_record = []

            print "video count", video_count
            video_count += 1

            if video_count >= len(all_file_names):
                break

            log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
            log_file = open(log_path, 'wb')
示例#29
0
    def main(self, args, net_env=None):
        self.args = args
        np.random.seed(RANDOM_SEED)
        viper_flag = True
        assert len(VIDEO_BIT_RATE) == A_DIM

        if net_env is None:
            viper_flag = False
            all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
                args.traces)
            net_env = env.Environment(all_cooked_time=all_cooked_time,
                                      all_cooked_bw=all_cooked_bw,
                                      all_file_names=all_file_names)

        if not viper_flag and args.log:
            log_path = LOG_FILE + '_' + net_env.all_file_names[
                net_env.trace_idx] + '_' + args.qoe_metric
            log_file = open(log_path, 'wb')

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        rollout = []

        video_count = 0

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real

            delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \
            video_chunk_remain = net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            reward = get_reward(bit_rate, rebuf, last_bit_rate,
                                args.qoe_metric)
            r_batch.append(reward)
            last_bit_rate = bit_rate

            if args.log:
                # log time_stamp, bit_rate, buffer_size, reward
                log_file.write(
                    bytes(str(time_stamp / M_IN_K) + '\t' +
                          str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                          str(buffer_size) + '\t' + str(rebuf) + '\t' +
                          str(video_chunk_size) + '\t' + str(delay) + '\t' +
                          str(reward) + '\n',
                          encoding='utf-8'))
                log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR
            state[2, -1] = rebuf
            state[3, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[4, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            bit_rate = self.predict(state)
            serialized_state = []
            # Log input of neural network
            serialized_state.append(state[0, -1])
            serialized_state.append(state[1, -1])
            serialized_state.append(state[2, -1])
            for i in range(5):
                serialized_state.append(state[3, i])
            serialized_state.append(state[4, -1])
            #print(serialized_state)
            #print(state)
            rollout.append((state, bit_rate, serialized_state))

            if end_of_video:
                if args.log:
                    log_file.write(bytes('\n', encoding='utf-8'))
                    log_file.close()
                    print("video count", video_count)

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                if viper_flag:
                    break
                else:
                    video_count += 1
                    if video_count >= len(net_env.all_file_names):
                        break
                    if args.log:
                        log_path = LOG_FILE + '_' + net_env.all_file_names[
                            net_env.trace_idx] + '_' + args.qoe_metric
                        log_file = open(log_path, 'wb')

        return rollout
示例#30
0
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(TEST_TRACES)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    with tf.Session() as sess:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN], action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        if NN_MODEL is not None:  # NN_MODEL is the path to file
            saver.restore(sess, NN_MODEL)
            print("Testing model restored.")

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smoothness
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(str(time_stamp / M_IN_K) + '\t' +
                           str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                           str(buffer_size) + '\t' +
                           str(rebuf) + '\t' +
                           str(video_chunk_size) + '\t' +
                           str(delay) + '\t' +
                           str(reward) + '\n')
            log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            DECISIONS.append(bit_rate)

            s_batch.append(state)

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            if end_of_video:
                log_file.write('\n')
                log_file.close()

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                video_count += 1

                if video_count >= len(all_file_names):
                    break

                log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
                log_file = open(log_path, 'wb')

    print "Decisions: {}".format(Counter(DECISIONS))