Python compute_entropy示例

编程语言: Python

命名空间/包名称: a3c

方法/功能: compute_entropy

hotexamples.com的示例: 20

Python compute_entropy - 已找到20个示例。这些是从开源项目中提取的最受好评的a3c.compute_entropy现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def agent(agent_id, net_params_queue, exp_queue):

    net_env = env.Environment(random_seed=agent_id,
                              fixed_env=False,
                              trace_folder=TRAIN_TRACES)

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        mask = net_env.video_masks[net_env.video_idx]

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action = bitrate_to_action(bit_rate, mask)
        last_action = action

        action_vec = np.zeros(np.sum(mask))
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        time_stamp = 0
        while True:  # experience video streaming forever

            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, \
                rebuf, video_chunk_size, end_of_video, \
                video_chunk_remain, video_num_chunks, \
                next_video_chunk_size, mask = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            reward = VIDEO_BIT_RATE[action] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[action] -
                                               VIDEO_BIT_RATE[last_action]) / M_IN_K

            r_batch.append(reward)

            last_bit_rate = bit_rate
            last_action = action

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[action] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K
            state[4, -1] = video_chunk_remain / float(video_num_chunks)
            state[5, :] = -1
            nxt_chnk_cnt = 0
            for i in xrange(A_DIM):
                if mask[i] == 1:
                    state[5, i] = next_video_chunk_size[nxt_chnk_cnt] / M_IN_B
                    nxt_chnk_cnt += 1
            assert (nxt_chnk_cnt) == np.sum(mask)
            state[6, -A_DIM:] = mask

            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))

            # the action probability should correspond to number of bit rates
            assert len(action_prob[0]) == np.sum(mask)

            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            action = bitrate_to_action(bit_rate, mask)

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[action]) + '\t' +
                str(buffer_size) + '\t' + str(rebuf) + '\t' +
                str(video_chunk_size) + '\t' + str(delay) + '\t' +
                str(reward) + '\n')
            log_file.flush()

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN or end_of_video:
                exp_queue.put([
                    s_batch[1:],  # ignore the first chuck
                    a_batch[1:],  # since we don't have the
                    r_batch[1:],  # control over it
                    end_of_video,
                    {
                        'entropy': entropy_record
                    }
                ])

                # synchronize the network parameters from the coordinator
                actor_net_params, critic_net_params = net_params_queue.get()
                actor.set_network_params(actor_net_params)
                critic.set_network_params(critic_net_params)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del entropy_record[:]

                log_file.write(
                    '\n')  # so that in the log we know where video ends

            # store the state and action into batches
            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action = bitrate_to_action(bit_rate, mask)
                last_action = action
                action_vec = np.zeros(np.sum(mask))
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(np.sum(mask))
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)

示例#2

显示文件

def main():

    summary_dir = SUMMARY_DIR
    if not os.path.exists(summary_dir):
        os.makedirs(summary_dir)
    log_file_dir = TEST_LOG_FOLDER
    if not os.path.exists(log_file_dir):
        os.makedirs(log_file_dir)

    TOTAL_REWARD_BITRATE = 0.0
    TOTAL_REWARD_HD_BITRATE = 0.0
    TOTAL_REWARD_REBUF = 0.0
    TOTAL_REWARD_SMOOTHNESS = 0.0
    TOTAL_REWARD = 0.0
    TOTAL_HOTSPOT_CHUNKS = 0.0

    np.random.seed(RANDOM_SEED)

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        TEST_TRACES)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    with tf.Session() as sess:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        if NN_MODEL is not None:  # NN_MODEL is the path to file
            saver.restore(sess, NN_MODEL)
            print "Testing model restored."

        time_stamp = 0

        prefetch_decision = DEFAULT_PREFETCH
        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[prefetch_decision] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            state_data_for_action = net_env.execute_action(prefetch_decision)

            # normal chunk state information
            delay = state_data_for_action['delay']
            sleep_time = state_data_for_action['sleep_time']
            last_bit_rate = state_data_for_action['last_bit_rate']
            play_buffer_size = state_data_for_action['play_buffer_size']
            rebuf = state_data_for_action['rebuf']
            video_chunk_size = state_data_for_action['video_chunk_size']
            next_video_chunk_sizes = state_data_for_action[
                'next_video_chunk_sizes']
            end_of_video = state_data_for_action['end_of_video']
            video_chunk_remain = state_data_for_action['video_chunk_remain']
            current_seq_no = state_data_for_action['current_seq_no']
            log_prefetch_decision = state_data_for_action[
                'log_prefetch_decision']

            # hotspot chunk state information
            was_hotspot_chunk = 1.0 * state_data_for_action['was_hotspot_chunk']
            TOTAL_HOTSPOT_CHUNKS += was_hotspot_chunk
            hotspot_chunks_remain = state_data_for_action[
                'hotspot_chunks_remain']
            chunks_till_played = state_data_for_action['chunks_till_played']
            total_buffer_size = state_data_for_action['total_buffer_size']
            last_hotspot_bit_rate = state_data_for_action[
                'last_hotspot_bit_rate']
            next_hotspot_chunk_sizes = state_data_for_action[
                'next_hotspot_chunk_sizes']
            dist_from_hotspot_chunks = state_data_for_action[
                'dist_from_hotspot_chunks']
            smoothness_eval_bitrates = state_data_for_action[
                'smoothness_eval_bitrates']

            # abr decision state information
            normal_bitrate_pensieve = state_data_for_action[
                'normal_bitrate_pensieve']
            hotspot_bitrate_pensieve = state_data_for_action[
                'hotspot_bitrate_pensieve']

            # print len(next_video_chunk_sizes)
            # print len(next_hotspot_chunk_sizes)

            last_overall_bitrate = last_bit_rate
            if prefetch_decision == 1:
                last_overall_bitrate = last_hotspot_bit_rate

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smoothness
            reward_normal_br = (1.0 - was_hotspot_chunk) * (
                VIDEO_BIT_RATE[last_bit_rate] / M_IN_K) * 1.0
            reward_hotspot_br = was_hotspot_chunk * HD_REWARD[
                last_hotspot_bit_rate] * 1.0
            reward_rebuffering = REBUF_PENALTY * rebuf * 1.0
            reward_smoothness = 0.0
            if len(smoothness_eval_bitrates) > 1:
                for i in xrange(len(smoothness_eval_bitrates) - 1):
                    reward_smoothness += 1.0 * SMOOTH_PENALTY * (1.0 * np.abs(
                        VIDEO_BIT_RATE[smoothness_eval_bitrates[i + 1]] -
                        VIDEO_BIT_RATE[smoothness_eval_bitrates[i]]) / M_IN_K)

            reward = (1.0 * reward_normal_br) + (1.0 * reward_hotspot_br) - (
                1.0 * reward_rebuffering) - (1.0 * reward_smoothness)

            TOTAL_REWARD_BITRATE += reward_normal_br
            TOTAL_REWARD_HD_BITRATE += reward_hotspot_br
            TOTAL_REWARD_REBUF += reward_rebuffering
            TOTAL_REWARD_SMOOTHNESS += reward_smoothness
            TOTAL_REWARD += reward

            # print "reward before: {}".format(reward)

            r_batch.append(reward)

            # print "reward after: {}".format(reward)

            # log time_stamp, bit_rate, buffer_size, reward
            if not end_of_video:
                log_file.write(
                    str(time_stamp) + '\t' +
                    str(VIDEO_BIT_RATE[last_overall_bitrate]) + '\t' +
                    str(play_buffer_size) + '\t' + str(rebuf) + '\t' +
                    str(video_chunk_size) + '\t' + str(delay) + '\t' +
                    str(reward) + '\t' + str(log_prefetch_decision) + '\t' +
                    str(int(was_hotspot_chunk)) + '\t' + str(current_seq_no) +
                    '\n')
                log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            ## Normal state S_ABR_INFO
            state[0, -1] = VIDEO_BIT_RATE[last_overall_bitrate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = play_buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :BITRATE_LEVELS] = np.array(
                next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)
            ## Hotspot state S_HOT_INFO
            state[6, -1] = np.minimum(
                hotspot_chunks_remain,
                NUM_HOTSPOT_CHUNKS) / float(NUM_HOTSPOT_CHUNKS)
            state[7, -1] = np.minimum(
                chunks_till_played,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)
            state[8, -1] = total_buffer_size / BUFFER_NORM_FACTOR
            state[9,
                  -1] = last_hotspot_bit_rate / float(np.max(VIDEO_BIT_RATE))
            state[10, :BITRATE_LEVELS] = np.array(
                next_hotspot_chunk_sizes) / M_IN_K / M_IN_K
            state[11, :NUM_HOTSPOT_CHUNKS] = (
                np.array(dist_from_hotspot_chunks) +
                CHUNK_TIL_VIDEO_END_CAP) / float(2 * CHUNK_TIL_VIDEO_END_CAP)

            ## Bitrate actions state S_BRT_INFO
            state[12,
                  -1] = normal_bitrate_pensieve / float(np.max(VIDEO_BIT_RATE))
            state[13, -1] = hotspot_bitrate_pensieve / float(
                np.max(VIDEO_BIT_RATE))

            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            prefetch_decision = (
                action_cumsum >
                np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            s_batch.append(state)

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            if end_of_video:
                log_file.write('\n')
                log_file.close()
                # break

                prefetch_decision = DEFAULT_PREFETCH

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                action_vec = np.zeros(A_DIM)
                action_vec[prefetch_decision] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                video_count += 1

                if video_count >= len(all_file_names):
                    break

                # print "log file: {}".format(log_file)
                # print "Hot chunks: {}".format(TOTAL_HOTSPOT_CHUNKS)

                log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
                log_file = open(log_path, 'wb')

        print "Normal bitrate reward: {}".format(TOTAL_REWARD_BITRATE)
        print "Hotspot bitrate reward: {}".format(TOTAL_REWARD_HD_BITRATE)
        print "Rebuffering reward: {}".format(TOTAL_REWARD_REBUF)
        print "Smoothness reward: {}".format(TOTAL_REWARD_SMOOTHNESS)
        print "Total reward: {}".format(TOTAL_REWARD)
        print "Total hotspot chunks: {}".format(int(TOTAL_HOTSPOT_CHUNKS))

示例#3

显示文件

文件： multi_agent.py 项目： csqs/video_bitrate_control

def agent(agent_id, all_cooked_time, all_cooked_bw, net_params_queue,
          exp_queue, epoch_queue):
    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=agent_id)

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # 1.从center同步最新的模型参数 initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        epoch_num = epoch_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)  #初始化 动作空间A个actions
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        time_stamp = 0
        while True:  # experience video streaming forever

            # 和环境Env交互 the action is from the last decision
            # this is to make the framework similar to the real
            # delay, sleep_time, buffer_size, rebuf, \
            # video_chunk_size, next_video_chunk_sizes, \
            # end_of_video, video_chunk_remain = \
            #     net_env.get_video_chunk(bit_rate)

            assert bit_rate >= 0
            assert bit_rate < A_DIM
            bitrate_send_last, lossrate_recv_last, bitrate_real_recovery,\
            bitrate_send_last_probe, lossrate_recv_last_probe, bitrate_real_recovery_probe,\
             end_of_video \
            = net_env.action_dispatch_and_report_svr(VIDEO_BIT_RATE[bit_rate])

            time_stamp += 2
            # -- linear reward --
            # reward is video quality - rebuffer penalty - smoothness
            #print '1', net_env.netbw
            #print '2', bitrate_send_last_probe * (1 - lossrate_recv_last_probe)
            x_funtion_top = (bitrate_send_last_probe *
                             (1 - lossrate_recv_last_probe) -
                             VIDEO_BIT_RATE[bit_rate]) / M_IN_K
            reward = -x_funtion_top * x_funtion_top  # 0.1 0.2 ... 1.1 1.2

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            #state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            #state[0, -1] = bitrate_send_last / 1000.0  # last quality
            #state[1, -1] = lossrate_recv_last  # 丢包率0.1 0.2 0.3 0.4
            #state[2, -1] = bitrate_real_recovery / 1000.0  # kilo byte / ms

            state = np.roll(state, -1, axis=1)
            state[0, -1] = bitrate_send_last_probe / 1000.0  # last quality
            state[1, -1] = lossrate_recv_last_probe  # 丢包率0.1 0.2 0.3 0.4
            state[2,
                  -1] = bitrate_real_recovery_probe / 1000.0  # kilo byte / ms

            state[3, :A_DIM] = np.array(
                VIDEO_BIT_RATE[:]) / 1000.0  # kilo byte / ms
            state[4, -1] = bitrate_send_last / 1000.0  # kilo byte / ms
            # print state[3, :A_DIM]

            # ================== Predict BandWidth =========================

            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)

            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()

            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                str(bitrate_send_last) + '\t' + str(lossrate_recv_last) +
                '\t' + str(bitrate_real_recovery) + '\t' + str(reward) + '\n')
            log_file.flush()

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN or end_of_video:
                exp_queue.put([
                    s_batch[1:],  # ignore the first chuck
                    a_batch[1:],  # since we don't have the
                    r_batch[1:],  # control over it
                    end_of_video,
                    {
                        'entropy': entropy_record
                    }
                ])

                # synchronize the network parameters from the coordinator
                actor_net_params, critic_net_params = net_params_queue.get()
                actor.set_network_params(actor_net_params)
                critic.set_network_params(critic_net_params)
                epoch_num = epoch_queue.get()

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del entropy_record[:]

                log_file.write(
                    '\n')  # so that in the log we know where video ends

            # store the state and action into batches
            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)

示例#4

显示文件

文件： rl_test.py 项目： XuYaJia/https-github.com-hongzimao-pensieve

def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    net_env = env.Environment(fixed_env=True, 
                              trace_folder=TEST_TRACES, 
                              video_folder=TEST_VIDEO_FOLDER)

    log_path = LOG_FILE + '_' + net_env.all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    with tf.Session() as sess:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN], action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        if NN_MODEL is not None:  # NN_MODEL is the path to file
            saver.restore(sess, NN_MODEL)
            print("Testing model restored.")

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action = bitrate_to_action(bit_rate, net_env.video_masks[net_env.video_idx])
        last_action = action

        s_batch = [np.zeros((S_INFO, S_LEN))]

        entropy_record = []

        video_count = 0

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, \
                rebuf, video_chunk_size, end_of_video, \
                video_chunk_remain, video_num_chunks, \
                next_video_chunk_size, mask = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            reward = VIDEO_BIT_RATE[action] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[action] -
                                               VIDEO_BIT_RATE[last_action]) / M_IN_K

            last_bit_rate = bit_rate
            last_action = action

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(str(time_stamp / M_IN_K) + '\t' +
                           str(VIDEO_BIT_RATE[action]) + '\t' +
                           str(buffer_size) + '\t' +
                           str(rebuf) + '\t' +
                           str(video_chunk_size) + '\t' +
                           str(delay) + '\t' +
                           str(reward) + '\n')
            log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[action] / float(np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR
            state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K
            state[4, -1] = video_chunk_remain / float(video_num_chunks)
            state[5, :] = -1
            nxt_chnk_cnt = 0
            for i in xrange(A_DIM):
                if mask[i] == 1:
                    state[5, i] = next_video_chunk_size[nxt_chnk_cnt] / M_IN_B
                    nxt_chnk_cnt += 1
            assert(nxt_chnk_cnt) == np.sum(mask)
            state[6, -A_DIM:] = mask

            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))

            # the action probability should correspond to number of bit rates
            assert len(action_prob[0]) == np.sum(mask)

            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states
            action = bitrate_to_action(bit_rate, mask)

            s_batch.append(state)

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            if end_of_video:
                log_file.write('\n')
                log_file.close()

                del s_batch[:]

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action = bitrate_to_action(bit_rate, mask)
                last_action = action

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                entropy_record = []

                video_count += 1

                if video_count >= len(net_env.all_cooked_bw):
                    break

                log_path = LOG_FILE + '_' + net_env.all_file_names[net_env.trace_idx]
                log_file = open(log_path, 'wb')

示例#5

显示文件

文件： multi_agent.py 项目： 888yzbt888/Pensieve-multiagent

def agent(agent_id, all_cooked_time, all_cooked_bw, net_params_queue,
          exp_queue):  # agent号，trece数据，对应的两个队列的列表

    #Summary:先建立环境，然后打开Session(){
    #    生成神经网络
    #    （从主agent获取参数，给神经网络初始化）
    #    选取默认动作，初始化batch[],entropy[]
    #    循环：{
    #        从环境更新状态，新状态加入batch[]，选择新动作,记录数据进文件
    #        积累到batch大小，放到多进程的Queue中（等待主agent取出）
    #        重新从主agent获取参数，清除旧batch[]的数据
    #    }
    #}
    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=agent_id)  # 调试环境参数？

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        # 创建actor神经网络，参数为tensorflow的Session，[输入神经元个数，历史带宽长度]，输出神经元个数（码率范围），学习率
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        # 创建critic神经网络，参数为tensorflow的Session，[输入神经元个数，历史带宽长度]，学习率
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)  # [0,0,0,0,0,0]
        action_vec[bit_rate] = 1  # 设置有效码率为1（其中一个）

        s_batch = [np.zeros((S_INFO, S_LEN))]  # [6*8的0矩阵,]，历史状态列表？
        a_batch = [action_vec]  # [[0,0,0,0,0,0],]
        r_batch = []  # reward？
        entropy_record = []

        time_stamp = 0
        while True:  # experience video streaming forever

            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate) # 还没看懂

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # -- linear reward --
            # reward is video quality - rebuffer penalty - smoothness
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

            # -- log scale reward --
            # log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[-1]))
            # log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[-1]))

            # reward = log_bit_rate \
            #          - REBUF_PENALTY * rebuf \
            #          - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)

            # -- HD reward --
            # reward = HD_REWARD[bit_rate] \
            #          - REBUF_PENALTY * rebuf \
            #          - SMOOTH_PENALTY * np.abs(HD_REWARD[bit_rate] - HD_REWARD[last_bit_rate])

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # retrieve取回/恢复 previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)  # 没看懂

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality，码率
            state[
                1,
                -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec，current buffer size，缓存大小
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms， 带宽测量
            state[3, -1] = float(
                delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec， 延迟时间，下载时间？
            state[4, :A_DIM] = np.array(
                next_video_chunk_sizes
            ) / M_IN_K / M_IN_K  # mega byte， 下一个chunk的各种size，放在前6列？
            state[5, -1] = np.minimum(video_chunk_remain,
                                      CHUNK_TIL_VIDEO_END_CAP) / float(
                                          CHUNK_TIL_VIDEO_END_CAP)  # 剩余chunks

            # compute action probability vector，这里没搞懂
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()  # rand_range = 1000,前面有
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                str(buffer_size) + '\t' + str(rebuf) + '\t' +
                str(video_chunk_size) + '\t' + str(delay) + '\t' +
                str(reward) + '\n')
            log_file.flush()

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN or end_of_video:
                exp_queue.put([
                    s_batch[1:],  # ignore the first chuck
                    a_batch[1:],  # since we don't have the
                    r_batch[1:],  # control over it
                    end_of_video,
                    {
                        'entropy': entropy_record
                    }
                ])

                # synchronize the network parameters from the coordinator，更新神经网络参数
                actor_net_params, critic_net_params = net_params_queue.get()
                actor.set_network_params(actor_net_params)
                critic.set_network_params(critic_net_params)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del entropy_record[:]

                log_file.write(
                    '\n')  # so that in the log we know where video ends

            # store the state and action into batches
            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)

示例#6

显示文件

def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(TEST_TRACES)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    with tf.Session() as sess:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN], action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        if NN_MODEL is not None:  # NN_MODEL is the path to file
            saver.restore(sess, NN_MODEL)
            print("Testing model restored.")

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smoothness
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(str(time_stamp / M_IN_K) + '\t' +
                           str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                           str(buffer_size) + '\t' +
                           str(rebuf) + '\t' +
                           str(video_chunk_size) + '\t' +
                           str(delay) + '\t' +
                           str(reward) + '\n')
            log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            DECISIONS.append(bit_rate)

            s_batch.append(state)

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            if end_of_video:
                log_file.write('\n')
                log_file.close()

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                video_count += 1

                if video_count >= len(all_file_names):
                    break

                log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
                log_file = open(log_path, 'wb')

    print "Decisions: {}".format(Counter(DECISIONS))

示例#7

显示文件

def agent(agent_id, all_cooked_time, all_cooked_bw, net_params_queue,
          exp_queue):

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=agent_id)

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        prefetch_decision = DEFAULT_PREFETCH

        action_vec = np.zeros(A_DIM)
        action_vec[
            prefetch_decision] = 1  # Normal chunk action: [1,0]; Hotspot chunk action: [0,1]

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        time_stamp = 0
        while True:  # experience video streaming forever

            # ---------------------------------

            # the action is from the last decision
            # this is to make the framework similar to the real
            # TO-DO: Add additional state info
            state_data_for_action = net_env.execute_action(prefetch_decision)

            # normal chunk state information
            delay = state_data_for_action['delay']
            sleep_time = state_data_for_action['sleep_time']
            last_bit_rate = state_data_for_action['last_bit_rate']
            play_buffer_size = state_data_for_action['play_buffer_size']
            rebuf = state_data_for_action['rebuf']
            video_chunk_size = state_data_for_action['video_chunk_size']
            next_video_chunk_sizes = state_data_for_action[
                'next_video_chunk_sizes']
            end_of_video = state_data_for_action['end_of_video']
            video_chunk_remain = state_data_for_action['video_chunk_remain']

            # hotspot chunk state information
            was_hotspot_chunk = state_data_for_action['was_hotspot_chunk']
            hotspot_chunks_remain = state_data_for_action[
                'hotspot_chunks_remain']
            chunks_till_played = state_data_for_action['chunks_till_played']
            total_buffer_size = state_data_for_action['total_buffer_size']
            last_hotspot_bit_rate = state_data_for_action[
                'last_hotspot_bit_rate']
            next_hotspot_chunk_sizes = state_data_for_action[
                'next_hotspot_chunk_sizes']
            dist_from_hotspot_chunks = state_data_for_action[
                'dist_from_hotspot_chunks']
            smoothness_eval_bitrates = state_data_for_action[
                'smoothness_eval_bitrates']

            # abr decision state information
            normal_bitrate_pensieve = state_data_for_action[
                'normal_bitrate_pensieve']
            hotspot_bitrate_pensieve = state_data_for_action[
                'hotspot_bitrate_pensieve']

            # print len(next_video_chunk_sizes)
            # print len(next_hotspot_chunk_sizes)

            last_overall_bitrate = last_bit_rate
            if prefetch_decision == 1:
                last_overall_bitrate = last_hotspot_bit_rate
            # ---------------------------------

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # -- linear reward -- (in hotspot aware scenario)
            # reward is video quality - rebuffer penalty - smoothness + hd reward for hotspot
            reward_normal_br = (1 - was_hotspot_chunk) * (
                VIDEO_BIT_RATE[last_bit_rate] / M_IN_K) * 1.0
            reward_hotspot_br = was_hotspot_chunk * HD_REWARD[
                last_hotspot_bit_rate] * 1.0
            reward_rebuffering = REBUF_PENALTY * rebuf * 1.0
            reward_smoothness = 0.0
            if len(smoothness_eval_bitrates) > 1:
                for i in xrange(len(smoothness_eval_bitrates) - 1):
                    reward_smoothness += 1.0 * SMOOTH_PENALTY * (1.0 * np.abs(
                        VIDEO_BIT_RATE[smoothness_eval_bitrates[i + 1]] -
                        VIDEO_BIT_RATE[smoothness_eval_bitrates[i]]) / M_IN_K)

            reward = (1.0 * reward_normal_br) + (1.0 * reward_hotspot_br) - (
                1.0 * reward_rebuffering) - (1.0 * reward_smoothness)

            # -- log scale reward --
            # log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[-1]))
            # log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[-1]))

            # reward = log_bit_rate \
            #          - REBUF_PENALTY * rebuf \
            #          - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)

            # -- HD reward --
            # reward = HD_REWARD[bit_rate] \
            #          - REBUF_PENALTY * rebuf \
            #          - SMOOTH_PENALTY * np.abs(HD_REWARD[bit_rate] - HD_REWARD[last_bit_rate])

            r_batch.append(reward)

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            ## Normal state S_ABR_INFO
            state[0, -1] = VIDEO_BIT_RATE[last_overall_bitrate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = play_buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :BITRATE_LEVELS] = np.array(
                next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)
            ## Hotspot state S_HOT_INFO
            state[6, -1] = np.minimum(
                hotspot_chunks_remain,
                NUM_HOTSPOT_CHUNKS) / float(NUM_HOTSPOT_CHUNKS)
            state[7, -1] = np.minimum(
                chunks_till_played,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)
            state[8, -1] = total_buffer_size / BUFFER_NORM_FACTOR
            state[9,
                  -1] = last_hotspot_bit_rate / float(np.max(VIDEO_BIT_RATE))
            state[10, :BITRATE_LEVELS] = np.array(
                next_hotspot_chunk_sizes) / M_IN_K / M_IN_K
            state[11, :NUM_HOTSPOT_CHUNKS] = (
                np.array(dist_from_hotspot_chunks) +
                CHUNK_TIL_VIDEO_END_CAP) / float(2 * CHUNK_TIL_VIDEO_END_CAP)

            ## Bitrate actions state S_BRT_INFO
            state[12,
                  -1] = normal_bitrate_pensieve / float(np.max(VIDEO_BIT_RATE))
            state[13, -1] = hotspot_bitrate_pensieve / float(
                np.max(VIDEO_BIT_RATE))

            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            prefetch_decision = (
                action_cumsum >
                np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward, prefetch_decision
            log_file.write(
                str(time_stamp) + '\t' +
                str(VIDEO_BIT_RATE[last_overall_bitrate]) + '\t' +
                str(play_buffer_size) + '\t' + str(rebuf) + '\t' +
                str(video_chunk_size) + '\t' + str(delay) + '\t' +
                str(prefetch_decision) + '\t' + str(reward) + '\n')
            log_file.flush()

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN or end_of_video:
                exp_queue.put([
                    s_batch[1:],  # ignore the first chunk
                    a_batch[1:],  # since we don't have the
                    r_batch[1:],  # control over it
                    end_of_video,
                    {
                        'entropy': entropy_record
                    }
                ])

                # synchronize the network parameters from the coordinator
                actor_net_params, critic_net_params = net_params_queue.get()
                actor.set_network_params(actor_net_params)
                critic.set_network_params(critic_net_params)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del entropy_record[:]

                log_file.write(
                    '\n')  # so that in the log we know where video ends

            # store the state and action into batches
            if end_of_video:
                prefetch_decision = DEFAULT_PREFETCH

                action_vec = np.zeros(A_DIM)
                action_vec[prefetch_decision] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[prefetch_decision] = 1
                a_batch.append(action_vec)

示例#8

显示文件

文件： base2.py 项目： yuting-li/AoI_RL

def main():

    np.random.seed(RANDOM_SEED)

    assert len(PACKET_SIZE) == A_DIM

    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace()

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_base2_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    with tf.Session() as sess:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            saver.restore(sess, nn_model)
            print("Model restored.")

        time_stamp = 0

        sensor_selection = DEFAULT_SELECTION

        action_vec = np.zeros(A_DIM)
        prob_violation = np.zeros(A_DIM)
        violation_n = np.zeros(A_DIM)
        action_vec[sensor_selection] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []
        video_count = 0
        objective = 0
        k = 0
        sum_age = 0
        sum_violation = 0
        while k < 30000:  # serve video forever

            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, video_chunk_size = net_env.get_video_chunk(
                sensor_selection)

            #time_stamp += delay  # in ms
            #time_stamp += sleep_time  # in ms

            #for n in range(0,A_DIM):
            #    violation[n] = 0
            #	if n == sensor_selection:
            #	    age[n,k] = delay
            #	else:
            #	    age[n,k] = age[n,k-1] + delay
            #	if age[n,k] > tau[n]:
            #	    violation[n] += 1
            #   sum_age = np.sum(age[:,:])
            #    sum_violation = np.sum(violation)
            #    expected_age=sum_age/(k*A_DIM)

            sum_age_before = np.sum(age[:, k])
            current_violation = 0
            for n in range(0, A_DIM):

                #for k in range (1,TRAIN_SEQ_LEN):
                if n == sensor_selection:
                    #print (j)
                    #time.sleep(2)
                    dummy = int(j[n])
                    j[n] += 1
                    age[n, k] = delay
                    anis[n, dummy] = age[n, k]

                    #violation[n] = 0

                else:
                    age[n, k] = age[n, k - 1] + delay
                    dummy = int(j[n])
                    anis[n, dummy] = age[n, k]
                if age[n, k] > tau[n]:
                    violation[n] += 1
                    current_violation = current_violation + (10 - n / 10)
                    violation_n_k[n, k] += 1

            prob_violation = violation / (k + 1)
            #print violation_n
            #time.sleep(2)
            for n in range(0, A_DIM):
                #expected_age[n] = gamma[n]*np.sum((anis[n,:int(j[n])+1])/(int(j[n])+1))
                expected_age_n[n] = np.sum(age[n, :]) / ((k + 1))
                if violation_n[n] > epsilon[n]:
                    hamza[n] = 1
                else:
                    hamza[n] = 0

            expected_age = np.sum(expected_age_n[:]) / A_DIM
            #prob_violation = violation/k
            #reward = (-np.sum(age[:,k]) - lamba*np.sum(violation_n_k[:,k]) - mu*np.sum(hamza[:]))/100
            reward = (-np.sum(age[:, k]) - lamba * current_violation -
                      mu * np.sum(hamza[:])) / 100
            sum_age += np.sum(age)
            if k == 29999:
                for n in range(0, A_DIM):
                    violation_n[n] = 1000 * (10 - n / 10) * violation[n] / (k +
                                                                            1)
                sum_age = sum_age / ((k + 1) * A_DIM)
                sum_violation = np.sum(violation_n)
                print(sum_age + sum_violation)
                print(100 * violation[:] / (k + 1))
                print(expected_age_n[:])

            r_batch.append(reward)
            log_file.write(
                str(time_stamp) + '\t' + str(PACKET_SIZE[sensor_selection]) +
                '\t' + str(delay) + '\t' + str(reward) + '\t' +
                str(age[0, k]) + '\t' + str(age[1, k]) + '\t' +
                str(age[2, k]) + '\t' + str(age[3, k]) + '\t' +
                str(age[4, k]) + '\t' + str(age[5, k]) + '\t' +
                str(age[6, k]) + '\t' + str(age[7, k]) + '\t' +
                str(age[8, k]) + '\t' + str(age[9, k]) + '\n')
            log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            state[0, -1] = float(age[0, k]) / M_IN_K
            state[1, -1] = float(age[1, k]) / M_IN_K
            state[2, -1] = float(age[2, k]) / M_IN_K
            state[3, -1] = float(age[3, k]) / M_IN_K
            state[4, -1] = float(age[4, k]) / M_IN_K
            state[5, -1] = float(age[5, k]) / M_IN_K
            state[6, -1] = float(age[6, k]) / M_IN_K
            state[7, -1] = float(age[7, k]) / M_IN_K
            state[8, -1] = float(age[8, k]) / M_IN_K
            state[9, -1] = float(age[9, k]) / M_IN_K
            #state[10, -1] = float(PACKET_SIZE[0])/float(PACKET_SIZE[9])
            #state[11, -1] = float(PACKET_SIZE[1])/float(PACKET_SIZE[9])
            #state[12, -1] = float(PACKET_SIZE[2])/float(PACKET_SIZE[9])
            #state[13, -1] = float(PACKET_SIZE[3])/float(PACKET_SIZE[9])
            #state[14, -1] = float(PACKET_SIZE[4])/float(PACKET_SIZE[9])
            #state[15, -1] = float(PACKET_SIZE[5])/float(PACKET_SIZE[9])
            #state[16, -1] = float(PACKET_SIZE[6])/float(PACKET_SIZE[9])
            #state[17, -1] = float(PACKET_SIZE[7])/float(PACKET_SIZE[9])
            #state[18, -1] = float(PACKET_SIZE[8])/float(PACKET_SIZE[9])
            #state[19, -1] = float(PACKET_SIZE[9])/float(PACKET_SIZE[9])
            state[10, -1] = float(delay) / 100
            state[11, -1] = float(PACKET_SIZE[sensor_selection]) / (
                100 * float(delay) * float(PACKET_SIZE[9]))

            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            #action_cumsum = np.cumsum(action_prob)
            sensor_selection = (age[:, k]).argmax(
            )  #(action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()

            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            entropy_record.append(a3c.compute_entropy(action_prob[0]))
            time_stamp += 1
            # log time_stamp, bit_rate, buffer_size, reward

            #if end_of_video:

            #    del s_batch[:]
            #    del a_batch[:]
            #    del r_batch[:]
            #    del entropy_record[:]
            #k = 0
            #for n in range(0,A_DIM):
            #    violation[n] = 0
            #    age[n,:] = 0
            #sensor_selection = DEFAULT_SELECTION

            #log_file.write('\n')  # so that in the log we know where video ends
            s_batch.append(state)

            action_vec = np.zeros(A_DIM)
            action_vec[sensor_selection] = 1
            a_batch.append(action_vec)
            #log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
            #log_file = open(log_path, 'wb')
            k += 1

示例#9

显示文件

def agent(agent_id, all_cooked_time, all_cooked_bw, all_file_names,
          video_size_file, net_params_queue, exp_queue):
    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=agent_id,
                              VIDEO_SIZE_FILE=video_size_file,
                              Debug=False)

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        bit_rate = DEFAULT_QUALITY
        target_buffer = DEFAULT_QUALITY
        latency_limit = 4
        index = 1
        action_vec = np.zeros(A_DIM)
        action_vec[index] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0
        reward_all_sum = 0
        reward_all = 0
        reward = 0
        switch_num = 0
        SMOOTH_PENALTY = 0.01
        REBUF_PENALTY = 1.5
        LANTENCY_PENALTY = 0.01
        BITRATE_REWARD = 0.001
        SKIP_PENALTY = 1
        epoch = 0
        n = 0
        state = np.array(s_batch[-1], copy=True)
        frame_time_len = 0.04
        last_bit_rate = DEFAULT_QUALITY
        while True:  # experience video streaming forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            time, time_interval, send_data_size, chunk_len, \
            rebuf, buffer_size, play_time_len, end_delay, \
            cdn_newest_id, download_id, cdn_has_frame, skip_frame_time_len, decision_flag, \
            buffer_flag, cdn_flag, skip_flag, end_of_video = net_env.get_video_frame(bit_rate, target_buffer,
                                                                                     latency_limit)
            # # QOE setting
            # if end_delay <= 1.0:
            #     LANTENCY_PENALTY = 0.005
            # else:
            #     LANTENCY_PENALTY = 0.01

            reward_frame = 0
            epoch += 1
            if not cdn_flag:
                reward_frame = frame_time_len * float(
                    BIT_RATE[bit_rate]
                ) * BITRATE_REWARD - REBUF_PENALTY * rebuf - LANTENCY_PENALTY * end_delay - SKIP_PENALTY * skip_frame_time_len
            else:
                reward_frame = -(REBUF_PENALTY * rebuf)
            reward += reward_frame

            # dequeue history record
            state = np.roll(state, -1, axis=1)
            # this should be S_INFO number of terms
            state[0, -1] = buffer_size * 0.1
            state[1, -1] = send_data_size * 0.00001
            state[2, -1] = time_interval * 10  # kilo byte / ms
            state[3, -1] = end_delay * 0.1  # 10 sec
            state[4, -1] = rebuf  # mega byte

            if decision_flag and not end_of_video:

                reward_frame = -1 * SMOOTH_PENALTY * (
                    abs(BIT_RATE[bit_rate] - BIT_RATE[last_bit_rate]) / 1000)
                reward += reward_frame
                last_bit_rate = bit_rate
                r_batch.append(reward)

                reward = 0

                # compute action probability vector
                action_prob = actor.predict(
                    np.reshape(state, (1, S_INFO, S_LEN)))
                action_cumsum = np.cumsum(action_prob)
                temp = np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)
                index = (action_cumsum > temp).argmax()

                bit_rate = ACTION_SAPCE[index][0]
                target_buffer = ACTION_SAPCE[index][1]
                latency_limit = ACTION_SAPCE[index][2]
                # Note: we need to discretize the probability into 1/RAND_RANGE steps,
                # because there is an intrinsic discrepancy in passing single state and batch states

                entropy_record.append(a3c.compute_entropy(action_prob[0]))

                # report experience to the coordinator
                if len(r_batch) >= TRAIN_SEQ_LEN:
                    exp_queue.put([
                        s_batch[1:],  # ignore the first chuck
                        a_batch[1:],  # since we don't have the
                        r_batch[1:],  # control over it
                        end_of_video,
                        {
                            'entropy': entropy_record
                        }
                    ])

                    # synchronize the network parameters from the coordinator
                    actor_net_params, critic_net_params = net_params_queue.get(
                    )
                    actor.set_network_params(actor_net_params)
                    critic.set_network_params(critic_net_params)

                    del s_batch[:]
                    del a_batch[:]
                    del r_batch[:]
                    del entropy_record[:]

                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[index] = 1
                a_batch.append(action_vec)

            reward_all += reward_frame

            # store the state and action into batches
            if end_of_video:
                r_batch.append(reward)

                reward_all_sum += reward_all / 20
                video_count += 1
                if video_count >= len(all_file_names):
                    n += 1
                    video_count = 0
                    print(n, "agent_id ", agent_id, "reward_all_sum:",
                          reward_all_sum)
                    w.writerow([n, reward_all_sum])
                    out.flush()
                    reward_all_sum = 0
                    net_env = env.Environment(all_cooked_time=all_cooked_time,
                                              all_cooked_bw=all_cooked_bw,
                                              random_seed=epoch,
                                              VIDEO_SIZE_FILE=video_size_file,
                                              Debug=False)
                    if n == NUM_EPOCH:
                        break

                reward_all = 0
                reward = 0
                switch_num = 0

                bit_rate = DEFAULT_QUALITY  # use the default action here
                target_buffer = DEFAULT_QUALITY

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

示例#10

显示文件

文件： rl_test.py 项目： StanfordSNR/pensieve

def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    # Originally defined in env.py
    mask = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
                                  inter_op_parallelism_threads=1)
    with tf.Session(config=session_conf) as sess:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        if NN_MODEL is not None:  # NN_MODEL is the path to file
            saver.restore(sess, NN_MODEL)

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action = bitrate_to_action(bit_rate, mask)
        last_action = action

        s_batch = [np.zeros((S_INFO, S_LEN))]

        entropy_record = []

        video_chunks_sent = 0
        video_num_chunks = 43200
        # 24 hours of video. Is this an acceptable proxy for never ending video?

        puffer_sock = start_ipc_client()

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real

            video_chunk_remain = video_num_chunks - video_chunks_sent

            delay, buffer_size, \
                rebuf, video_chunk_size, \
                next_video_chunk_size = \
                get_puffer_info(puffer_sock)

            reward = VIDEO_BIT_RATE[action] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[action] -
                                               VIDEO_BIT_RATE[last_action]) / M_IN_K

            last_bit_rate = bit_rate
            last_action = action

            # Add average audio size to each video chunk to improve throughput estimates
            # This is necessary because original Pensieve code does not consider audio, and
            # no simple solution exists given that our audio and video chunks are different
            # time scales.
            video_chunk_size += AVG_AUDIO_SIZE_BYTES
            for idx in xrange(len(next_video_chunk_size)):
                next_video_chunk_size[
                    idx] = next_video_chunk_size[idx] + AVG_AUDIO_SIZE_BYTES

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            if delay == 0:  #No division by zero
                delay = 1

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[action] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR
            state[2, -1] = float(video_chunk_size) / float(
                delay
            ) / M_IN_K  # kilo byte / ms # This is really just throughput
            state[3, -1] = float(delay) / M_IN_K
            state[4, -1] = video_chunk_remain / float(video_num_chunks)
            state[5, :] = -1
            nxt_chnk_cnt = 0
            for i in xrange(A_DIM):
                if mask[i] == 1:
                    state[5, i] = next_video_chunk_size[nxt_chnk_cnt] / M_IN_B
                    nxt_chnk_cnt += 1
            assert (nxt_chnk_cnt) == np.sum(mask)
            state[6, -A_DIM:] = mask

            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))

            # the action probability should correspond to number of bit rates
            assert len(action_prob[0]) == np.sum(mask)

            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states
            action = bitrate_to_action(bit_rate, mask)

            # Now I have my action! Send this action back to the Puffer server over IPC
            send_puffer_next_action(puffer_sock, bit_rate)

            s_batch.append(state)

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

示例#11

显示文件

        def do_POST(self):

            content_length = int(self.headers['Content-Length'])
            env_post_data = json.loads(self.rfile.read(content_length))

            # mlog(fnc="do_POST()", msg="POST req data: Last request - {}, Last quality - {}, Rebuffer Time - {}".format(
            #     post_data['lastRequest'], post_data['lastquality'], float(post_data['RebufferTime'] - self.input_dict['last_total_rebuf'])))
            send_data = ""

            if ('pastThroughput' in env_post_data):
                # @Hongzi: this is just the summary of throughput/quality at the end of the load
                # so we don't want to use this information to send back a new quality
                mlog(fnc="do_POST()",
                     msg="Past throughput is present in post_data, \
                        not using this information to send back quality")
            else:

                # Get params according to rl_test.py in original Pensieve code
                delay = env_post_data["delay"]
                sleep_time = env_post_data["sleep_time"]
                buffer_size = env_post_data["buffer_size"]
                rebuf = env_post_data["rebuf"]
                video_chunk_size = env_post_data["video_chunk_size"]
                next_video_chunk_sizes = env_post_data[
                    "next_video_chunk_sizes"]
                end_of_video = env_post_data["end_of_video"]
                video_chunk_remain = env_post_data["video_chunk_remain"]

                # Get additional params to differentiate between hotspot y/n cases
                bit_rate = env_post_data["bit_rate"]
                last_bit_rate = env_post_data["last_bit_rate"]
                is_last_action_prefetch = env_post_data[
                    "is_last_action_prefetch"]
                is_prefetch_hotspot = env_post_data["is_prefetch_hotspot"]

                Request_Handler.time_stamp += delay  # in ms
                Request_Handler.time_stamp += sleep_time  # in ms

                # rebuffer_time = float(post_data['RebufferTime'] - self.input_dict['last_total_rebuf'])

                # # --linear reward--
                # reward = VIDEO_BIT_RATE[post_data['lastquality']] / M_IN_K \
                #         - REBUF_PENALTY * rebuffer_time / M_IN_K \
                #         - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[post_data['lastquality']] -
                #                                   self.input_dict['last_bit_rate']) / M_IN_K

                # --log reward--
                # log_bit_rate = np.log(VIDEO_BIT_RATE[post_data['lastquality']] / float(VIDEO_BIT_RATE[0]))
                # log_last_bit_rate = np.log(self.input_dict['last_bit_rate'] / float(VIDEO_BIT_RATE[0]))
                # reward = log_bit_rate \
                #          - 4.3 * rebuffer_time / M_IN_K \
                #          - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)

                # --hd reward--
                # reward = BITRATE_REWARD[post_data['lastquality']] \
                #         - 8 * rebuffer_time / M_IN_K - np.abs(BITRATE_REWARD[post_data['lastquality']] - BITRATE_REWARD_MAP[self.input_dict['last_bit_rate']])

                # Linear reward
                reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

                # self.input_dict['last_bit_rate'] = VIDEO_BIT_RATE[post_data['lastquality']]
                # self.input_dict['last_total_rebuf'] = post_data['RebufferTime']

                self.r_batch.append(reward)

                # custom: append last state
                if Request_Handler.train_counter > 0:
                    if is_last_action_prefetch == 1:
                        self.s_batch.append(Request_Handler.last_hotspot_state)
                    else:
                        self.s_batch.append(Request_Handler.last_normal_state)

                # retrieve previous state
                if len(self.s_batch) == 0:
                    state = [np.zeros((S_INFO, S_LEN))]
                else:
                    state = np.array(self.s_batch[-1], copy=True)

                # compute bandwidth measurement
                # video_chunk_fetch_time = post_data['delay']
                # video_chunk_size = post_data['lastChunkSize']

                # compute number of video chunks left
                # video_chunk_remain = TOTAL_VIDEO_CHUNKS - post_data['videoChunkCount']

                # dequeue history record
                state = np.roll(state, -1, axis=1)
                # print "roll: {}, shape: {}".format(type(state), state.shape)

                # next_video_chunk_sizes = []
                # for i in xrange(A_DIM):
                #     next_video_chunk_sizes.append(get_chunk_size(i, post_data['nextVideoChunkIndex']))

                # this should be S_INFO number of terms
                # try:
                #     state[0, -1] = VIDEO_BIT_RATE[post_data['lastquality']] / float(np.max(VIDEO_BIT_RATE))
                #     state[1, -1] = post_data['buffer'] / BUFFER_NORM_FACTOR
                #     state[2, -1] = float(video_chunk_size) / float(video_chunk_fetch_time) / M_IN_K  # kilo byte / ms
                #     state[3, -1] = float(video_chunk_fetch_time) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
                #     state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
                #     state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

                #     print "Video bitrate: {}".format(state[0, -1])
                #     print "Buffer: {}".format(state[1, -1])
                #     print "Throughput: {}".format(state[2, -1])
                #     print "Download duration: {}".format(state[3, -1])
                #     print "Next video chunk sizes: {}".format(state[4, :A_DIM])
                #     print "Video chunks remaining: {}".format(state[5, -1])
                #     print "\n"

                # except ZeroDivisionError:
                #     # this should occur VERY rarely (1 out of 3000), should be a dash issue
                #     # in this case we ignore the observation and roll back to an eariler one
                #     if len(self.s_batch) == 0:
                #         state = [np.zeros((S_INFO, S_LEN))]
                #     else:
                #         state = np.array(self.s_batch[-1], copy=True)

                # this should be S_INFO number of terms
                try:
                    state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                        np.max(VIDEO_BIT_RATE))  # last quality
                    state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
                    state[2, -1] = float(video_chunk_size) / float(
                        delay) / M_IN_K  # kilo byte / ms
                    state[3, -1] = float(
                        delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
                    state[4, :A_DIM] = np.array(
                        next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
                    state[5, -1] = np.minimum(video_chunk_remain,
                                              CHUNK_TIL_VIDEO_END_CAP) / float(
                                                  CHUNK_TIL_VIDEO_END_CAP)

                except ZeroDivisionError:
                    # this should occur VERY rarely (1 out of 3000), should be a dash issue
                    # in this case we ignore the observation and roll back to an eariler one
                    if len(self.s_batch) == 0:
                        state = [np.zeros((S_INFO, S_LEN))]
                    else:
                        state = np.array(self.s_batch[-1], copy=True)

                # log wall_time, bit_rate, buffer_size, rebuffer_time, video_chunk_size, download_time, reward
                # self.log_file.write(str(time.time()) + '\t' +
                #                     str(VIDEO_BIT_RATE[post_data['lastquality']]) + '\t' +
                #                     str(post_data['buffer']) + '\t' +
                #                     str(rebuffer_time / M_IN_K) + '\t' +
                #                     str(video_chunk_size) + '\t' +
                #                     str(video_chunk_fetch_time) + '\t' +
                #                     str(reward) + '\n')
                # self.log_file.flush()
                # print "state construct: {}, shape: {}".format(type(state), state.shape)

                action_prob = self.actor.predict(
                    np.reshape(state, (1, S_INFO, S_LEN)))
                action_cumsum = np.cumsum(action_prob)
                bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                            float(RAND_RANGE)).argmax()
                # Note: we need to discretize the probability into 1/RAND_RANGE steps,
                # because there is an intrinsic discrepancy in passing single state and batch states

                self.entropy_record.append(a3c.compute_entropy(action_prob[0]))

                # send data to html side
                # send_data = str(bit_rate)
                send_data = json.dumps({"bitrate": bit_rate})
                mlog(fnc="do_POST()",
                     msg="Bitrate decision: {}".format(bit_rate))

                self.send_response(200)
                self.send_header('Content-Type', 'text/plain')
                self.send_header('Content-Length', len(send_data))
                self.send_header('Access-Control-Allow-Origin', "*")
                self.end_headers()
                self.wfile.write(send_data)

                # record [state, action, reward]
                # put it here after training, notice there is a shift in reward storage

                if is_prefetch_hotspot == 1:
                    Request_Handler.last_hotspot_state = state
                    Request_Handler.prefetch_decisions.append(0)
                else:
                    Request_Handler.last_normal_state = state
                    Request_Handler.prefetch_decisions.append(1)

                # self.s_batch.append(state)
                # print "batch append: {}, shape: {}".format(type(state), state.shape)

                Request_Handler.train_counter += 1

示例#12

显示文件

文件： main.py 项目： yogeshVU/QARC

def agent(agent_id, net_params_queue, exp_queue):
    net_env = innovation_env.Environment(random_seed=agent_id)
    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        last_vmaf = -1
        bit_rate = DEFAULT_QUALITY
        last_rtt = -1
        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        time_stamp = 0
        index = 1
        while True:  # experience video streaming forever
            _norm_bitrate = VIDEO_BIT_RATE[bit_rate]
            delay, loss, recv_bitrate, rtt, throughput, limbo_bytes_len = \
                net_env.get_video_chunk(bit_rate)

            rtt = float(rtt) / float(1000)
            if last_rtt < 0:
                last_rtt = rtt
            _norm_send_bitrate = bit_rate / A_DIM
            _queuing_delay = abs(rtt - last_rtt)
            _norm_recv_bitrate = min(
                float(recv_bitrate) / delay / BUFFER_NORM_FACTOR, 1.0)

            time_stamp += delay  # in ms
            vmaf = net_env.get_vmaf(bit_rate)
            if last_vmaf < 0:
                last_vmaf = vmaf

            #_normalized_bitrate = (_norm_bitrate - BITRATE_MIN) / (BITRATE_MAX - BITRATE_MIN)
            _vmaf_reward = (vmaf / _norm_bitrate) * BITRATE_MIN
            reward = vmaf - 0.2 * _norm_send_bitrate - 1.0 / DELAY_GRADIENT_MAX * \
                min(_queuing_delay, DELAY_GRADIENT_MAX) - \
                1.0 * abs(last_vmaf - vmaf)
            r_batch.append(reward)

            last_vmaf = vmaf
            last_rtt = rtt
            log_file.write(
                str(time_stamp) + '\t' + str(_norm_bitrate) + '\t' +
                str(recv_bitrate) + '\t' + str(limbo_bytes_len) + '\t' +
                str(rtt) + '\t' + str(vmaf) + '\t' + str(reward) + '\n')
            log_file.flush()

            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)
            state[0, -1] = _norm_send_bitrate  # last quality
            state[1, -1] = _norm_recv_bitrate  # kilo byte / ms
            state[2, -1] = _queuing_delay  # max:500ms
            state[3, -1] = float(loss)  # changed loss
            # test:add fft feature
            _fft = np.fft.fft(state[1])
            state[4] = _fft.real
            state[5] = _fft.imag
            state[6, -1] = net_env.get_single_image()
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            #print 'state',state[6]
            #print 'action',action_prob[0]
            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()
            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN:
                exp_queue.put([
                    s_batch[:],  # ignore the first chuck
                    a_batch[:],  # since we don't have the
                    r_batch[:],  # control over it
                    # end_of_video,
                    {
                        'entropy': entropy_record
                    }
                ])

                # synchronize the network parameters from the coordinator
                actor_net_params, critic_net_params = net_params_queue.get()
                actor.set_network_params(actor_net_params)
                critic.set_network_params(critic_net_params)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del entropy_record[:]

                # if index % MODEL_TEST_INTERVAL == 0 and agent_id == 0:
                #    print 'start test'
                # test(actor,index)
                index += 1
                # so that in the log we know where video ends
                log_file.write('\n')

            s_batch.append(state)

            action_vec = np.zeros(A_DIM)
            action_vec[bit_rate] = 1
            a_batch.append(action_vec)

示例#13

显示文件

文件： rl_no_training.py 项目： linnaeushuang/dash

def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    log_path = LOG_FILE + '_sim_0'
    log_file = open(log_path, 'wb')

    with tf.Session() as sess:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            saver.restore(sess, nn_model)
            print("Model restored.")

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0

        delay_file = open(DATA_PATH + '/lastdownloadtime0')
        #sleep_file = open(DATA_PATH + '/rebufftime0')
        buffer_size_file = open(DATA_PATH + '/buffer0')
        rebuf_file = open(DATA_PATH + '/rebufftime0')
        video_chunk_size_file = open(DATA_PATH + '/chunk_size0')
        video_chunk_remain_file = open(DATA_PATH + '/m_segmentleft0')
        time_file = open(DATA_PATH + '/time0')

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            with open(DATA_PATH + '/permission0') as enable:
                key = enable.read()
                if key == '1':
                    output_file = open(DATA_PATH + '/predict0', 'a')
                    file_permission = open(DATA_PATH + '/permission0', 'a')

                    delay = delay_file.readline().split('\n')[0]
                    delay = float(delay) * 1000  #in ms

                    sleep_time = 0.0  #float(sleep_file.readline().split('\n')[0])

                    buffer_size = float(
                        buffer_size_file.readline().split('\n')[0])
                    buffer_size = max(buffer_size, 0)

                    rebuf = float(rebuf_file.readline().split('\n')[0])

                    video_chunk_size = float(
                        video_chunk_size_file.readline().split('\n')[0])

                    next_video_chunk_sizes = np.multiply(VIDEO_BIT_RATE, 500)

                    video_chunk_remain = float(
                        video_chunk_remain_file.readline().split('\n')[0])
                    currTime = time_file.readline().split('\n')[0]

                    if video_chunk_remain == 0:
                        end_of_video = 1
                    else:
                        end_of_video = 0

                    time_stamp += delay  # in ms
                    time_stamp += sleep_time  # in ms

                    # reward is video quality - rebuffer penalty - smoothness
                    reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                             - REBUF_PENALTY * rebuf \
                             - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                                       VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

                    r_batch.append(reward)

                    last_bit_rate = bit_rate

                    # log time_stamp, bit_rate, buffer_size, reward
                    #log_file.write(str(time_stamp / M_IN_K) + '\t' +
                    #               str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                    #               str(buffer_size) + '\t' +
                    #               str(rebuf) + '\t' +
                    #               str(video_chunk_size) + '\t' +
                    #               str(delay) + '\t' +
                    #               str(reward) + '\n')
                    #log_file.flush()

                    # log time_stamp, bit_rate, buffer_size, reward
                    log_file.write(
                        str(currTime) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) +
                        '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' +
                        str(video_chunk_size) + '\t' + str(delay) + '\t' +
                        str(reward) + '\n')
                    log_file.flush()
                    # retrieve previous state
                    if len(s_batch) == 0:
                        state = [np.zeros((S_INFO, S_LEN))]
                    else:
                        state = np.array(s_batch[-1], copy=True)

                    # dequeue history record
                    state = np.roll(state, -1, axis=1)

                    # this should be S_INFO number of terms
                    state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                        np.max(VIDEO_BIT_RATE))  # last quality
                    state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
                    state[2, -1] = float(video_chunk_size) / float(
                        delay) / M_IN_K  # kilo byte / ms
                    state[3, -1] = float(
                        delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
                    state[4, :A_DIM] = np.array(
                        next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
                    state[5, -1] = np.minimum(video_chunk_remain,
                                              CHUNK_TIL_VIDEO_END_CAP) / float(
                                                  CHUNK_TIL_VIDEO_END_CAP)

                    action_prob = actor.predict(
                        np.reshape(state, (1, S_INFO, S_LEN)))
                    action_cumsum = np.cumsum(action_prob)
                    bit_rate = (action_cumsum >
                                np.random.randint(1, RAND_RANGE) /
                                float(RAND_RANGE)).argmax()

                    output_file.write(
                        str(VIDEO_BIT_RATE[int(bit_rate)] * 1000) + '\n')
                    file_permission.write('0\n')
                    output_file.close()
                    file_permission.close()
                    # Note: we need to discretize the probability into 1/RAND_RANGE steps,
                    # because there is an intrinsic discrepancy in passing single state and batch states

                    s_batch.append(state)

                    entropy_record.append(a3c.compute_entropy(action_prob[0]))

                    if end_of_video:
                        log_file.write('\n')
                        log_file.close()

                        last_bit_rate = DEFAULT_QUALITY
                        bit_rate = DEFAULT_QUALITY  # use the default action here

                        del s_batch[:]
                        del a_batch[:]
                        del r_batch[:]

                        action_vec = np.zeros(A_DIM)
                        action_vec[bit_rate] = 1

                        s_batch.append(np.zeros((S_INFO, S_LEN)))
                        a_batch.append(action_vec)
                        entropy_record = []

                        print "video count", video_count
                        video_count += 1

                        log_path = LOG_FILE + '_sim' + '_' + str(video_count)
                        log_file = open(log_path, 'wb')

示例#14

显示文件

def agent(agent_id, all_cooked_time, all_cooked_bw, net_params_queue,
          exp_queue):

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=agent_id)

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        sensor_selection = DEFAULT_SELECTION

        action_vec = np.zeros(A_DIM)
        action_vec[sensor_selection] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        time_stamp = 0
        k = 0
        sum_age = 0
        sum_violation = 0

        while True:  # experience video streaming forever

            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, video_chunk_size = net_env.get_video_chunk(
                sensor_selection)

            max_age = (age[:, k]).argmax()

            sum_age_before = np.sum(age[:, k])
            current_violation = 0
            for n in range(0, A_DIM):

                #for k in range (1,TRAIN_SEQ_LEN):
                if n == sensor_selection:
                    age[n, k] = delay

                else:
                    age[n, k] = age[n, k - 1] + delay

                if age[n, k] > tau[n]:
                    current_violation += 1

            for n in range(0, A_DIM):
                expected_age_n[n] = np.sum(age[n, :]) / ((k + 1))

            expected_age = np.sum(expected_age_n[:]) / A_DIM

            reward = (-np.sum(age[:, k]) - lamba * current_violation) / 100

            r_batch.append(reward)

            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms

            state[0, -1] = float(age[0, k]) / M_IN_K
            state[1, -1] = float(age[1, k]) / M_IN_K
            state[2, -1] = float(age[2, k]) / M_IN_K
            state[3, -1] = float(age[3, k]) / M_IN_K
            state[4, -1] = float(age[4, k]) / M_IN_K
            state[5, -1] = float(age[5, k]) / M_IN_K
            state[6, -1] = float(age[6, k]) / M_IN_K
            state[7, -1] = float(age[7, k]) / M_IN_K
            state[8, -1] = float(age[8, k]) / M_IN_K
            state[9, -1] = float(age[9, k]) / M_IN_K
            #state[10, -1] = float(PACKET_SIZE[0])/float(PACKET_SIZE[9])
            #state[11, -1] = float(PACKET_SIZE[1])/float(PACKET_SIZE[9])
            #state[12, -1] = float(PACKET_SIZE[2])/float(PACKET_SIZE[9])
            #state[13, -1] = float(PACKET_SIZE[3])/float(PACKET_SIZE[9])
            #state[14, -1] = float(PACKET_SIZE[4])/float(PACKET_SIZE[9])
            #state[15, -1] = float(PACKET_SIZE[5])/float(PACKET_SIZE[9])
            #state[16, -1] = float(PACKET_SIZE[6])/float(PACKET_SIZE[9])
            #state[17, -1] = float(PACKET_SIZE[7])/float(PACKET_SIZE[9])
            #state[18, -1] = float(PACKET_SIZE[8])/float(PACKET_SIZE[9])
            #state[19, -1] = float(PACKET_SIZE[9])/float(PACKET_SIZE[9])
            state[10, -1] = float(delay) / 100
            state[11, -1] = float(PACKET_SIZE[sensor_selection]) / (
                100 * float(delay) * float(PACKET_SIZE[9]))

            log_file.write(
                str(time_stamp) + '\t' + str(reward) + '\t' + str(age[:, k]) +
                '\t' + str(expected_age_n) + '\n')
            log_file.flush()

            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            sensor_selection = (
                action_cumsum >
                np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()

            entropy_record.append(a3c.compute_entropy(action_prob[0]))
            time_stamp += 1

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN:  #or end_of_video:
                exp_queue.put([
                    s_batch[1:],  # ignore the first chuck
                    a_batch[1:],  # since we don't have the
                    r_batch[1:],  # control over it
                    True,
                    {
                        'entropy': entropy_record
                    }
                ])

                # synchronize the network parameters from the coordinator
                actor_net_params, critic_net_params = net_params_queue.get()
                actor.set_network_params(actor_net_params)
                critic.set_network_params(critic_net_params)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del entropy_record[:]

            s_batch.append(state)

            action_vec = np.zeros(A_DIM)
            action_vec[sensor_selection] = 1
            a_batch.append(action_vec)
            k += 1

示例#15

显示文件

文件： multi_agent.py 项目： tianzhaotju/Deeplive

def agent(agent_id, all_cooked_time, all_cooked_bw, net_params_queue,
          exp_queue):

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=agent_id)

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        time_stamp = 0
        while True:  # experience video streaming forever

            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # -- linear reward --
            # reward is video quality - rebuffer penalty - smoothness
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

            # -- log scale reward --
            # log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[-1]))
            # log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[-1]))

            # reward = log_bit_rate \
            #          - REBUF_PENALTY * rebuf \
            #          - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)

            # -- HD reward --
            # reward = HD_REWARD[bit_rate] \
            #          - REBUF_PENALTY * rebuf \
            #          - SMOOTH_PENALTY * np.abs(HD_REWARD[bit_rate] - HD_REWARD[last_bit_rate])

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(
                next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                str(buffer_size) + '\t' + str(rebuf) + '\t' +
                str(video_chunk_size) + '\t' + str(delay) + '\t' +
                str(reward) + '\n')
            log_file.flush()

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN or end_of_video:
                exp_queue.put([
                    s_batch[1:],  # ignore the first chuck
                    a_batch[1:],  # since we don't have the
                    r_batch[1:],  # control over it
                    end_of_video,
                    {
                        'entropy': entropy_record
                    }
                ])

                # synchronize the network parameters from the coordinator
                actor_net_params, critic_net_params = net_params_queue.get()
                actor.set_network_params(actor_net_params)
                critic.set_network_params(critic_net_params)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del entropy_record[:]

                log_file.write(
                    '\n')  # so that in the log we know where video ends

            # store the state and action into batches
            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)

示例#16

显示文件

文件： rl_no_training.py 项目： fadelmuli/BayesMPC

def main():
    args = parser.parse_args()
    if args.lin:
        qoe_metric = 'results_lin'
    elif args.log:
        qoe_metric = 'results_log'
    else:
        print('Please select the QoE Metric!')
    
    if args.FCC:
        dataset = 'fcc'
    elif args.HSDPA:
        dataset = 'HSDPA'
    elif args.Oboe:
        dataset = 'Oboe'
    else:
        print('Please select the dataset!')
    
    dataset_path = './traces_' + dataset + '/'
    Log_file_path = './' + qoe_metric + '/' + dataset + '/log_sim_rl'

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    # if not os.path.exists(SUMMARY_DIR):
    #     os.makedirs(SUMMARY_DIR)

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(dataset_path)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = Log_file_path + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    with tf.Session() as sess:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN], action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            saver.restore(sess, nn_model)
            print("Model restored.")

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty
            if qoe_metric == 'results_lin':
                REBUF_PENALTY = 4.3
                reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                        - REBUF_PENALTY * rebuf \
                        - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                                VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
            else:
                REBUF_PENALTY = 2.66
                log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[0]))
                log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[0]))

                reward = log_bit_rate \
                        - REBUF_PENALTY * rebuf \
                        - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(str(time_stamp / M_IN_K) + '\t' +
                           str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                           str(buffer_size) + '\t' +
                           str(rebuf) + '\t' +
                           str(video_chunk_size) + '\t' +
                           str(delay) + '\t' +
                           str(reward) + '\n')
            log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            s_batch.append(state)

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            if end_of_video:
                log_file.write('\n')
                log_file.close()

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                print "video count", video_count
                video_count += 1

                if video_count >= len(all_file_names):
                    break

                log_path = Log_file_path + '_' + all_file_names[net_env.trace_idx]
                log_file = open(log_path, 'wb')

示例#17

显示文件

文件： agent.py 项目： swimming16/rl-buffer

def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, _ = load_trace.load_trace()
    #print(all_cooked_bw)

    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    with tf.Session() as sess, open(LOG_FILE, 'w') as log_file:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        summary_ops, summary_vars = a3c.build_summaries()

        sess.run(tf.global_variables_initializer())
        writer = tf.summary.FileWriter(SUMMARY_DIR,
                                       sess.graph)  # training monitor
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            saver.restore(sess, nn_model)
            print("Model restored.")

        epoch = 0
        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        actor_gradient_batch = []
        critic_gradient_batch = []

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video,video_chunk_counter,throughput, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)
            #print(net_env.get_video_chunk(bit_rate))
            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smooth penalty
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
            r_batch.append(reward)

            last_bit_rate = bit_rate

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)
            # print(state)

            # dequeue history record
            state = np.roll(state, -1, axis=1)
            print('state', state)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(
                next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            #rand=np.random.randint(1,RAND_RANGE)/ float(RAND_RANGE)
            #print(action_cumsum,action_cumsum>rand,(action_cumsum>rand).argmax())
            #print(action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE))
            #print(action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            print(
                '[%d]:download time %.2fms,chunk size %d,buffer=%.2fs,bitrate=%d'
                % (video_chunk_counter, delay, video_chunk_size, buffer_size,
                   last_bit_rate))
            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                str(buffer_size) + '\t' + str(rebuf) + '\t' +
                str(video_chunk_size) + '\t' + str(delay) + '\t' +
                str(reward) + '\n')
            log_file.flush()

            if len(r_batch
                   ) >= TRAIN_SEQ_LEN or end_of_video:  # do training once

                actor_gradient, critic_gradient, td_batch = \
                    a3c.compute_gradients(s_batch=np.stack(s_batch[1:], axis=0),  # ignore the first chuck
                                          a_batch=np.vstack(a_batch[1:]),  # since we don't have the
                                          r_batch=np.vstack(r_batch[1:]),  # control over it
                                          terminal=end_of_video, actor=actor, critic=critic)
                td_loss = np.mean(td_batch)

                actor_gradient_batch.append(actor_gradient)
                critic_gradient_batch.append(critic_gradient)

                print("====")
                print("Epoch", epoch)
                print("TD_loss", td_loss, "Avg_reward", np.mean(r_batch),
                      "Avg_entropy", np.mean(entropy_record))
                print("====")

                summary_str = sess.run(summary_ops,
                                       feed_dict={
                                           summary_vars[0]: td_loss,
                                           summary_vars[1]: np.mean(r_batch),
                                           summary_vars[2]:
                                           np.mean(entropy_record)
                                       })

                writer.add_summary(summary_str, epoch)
                writer.flush()

                entropy_record = []

                if len(actor_gradient_batch) >= GRADIENT_BATCH_SIZE:

                    assert len(actor_gradient_batch) == len(
                        critic_gradient_batch)
                    # assembled_actor_gradient = actor_gradient_batch[0]
                    # assembled_critic_gradient = critic_gradient_batch[0]
                    # assert len(actor_gradient_batch) == len(critic_gradient_batch)
                    # for i in xrange(len(actor_gradient_batch) - 1):
                    #     for j in xrange(len(actor_gradient)):
                    #         assembled_actor_gradient[j] += actor_gradient_batch[i][j]
                    #         assembled_critic_gradient[j] += critic_gradient_batch[i][j]
                    # actor.apply_gradients(assembled_actor_gradient)
                    # critic.apply_gradients(assembled_critic_gradient)

                    for i in range(len(actor_gradient_batch)):
                        actor.apply_gradients(actor_gradient_batch[i])
                        critic.apply_gradients(critic_gradient_batch[i])

                    actor_gradient_batch = []
                    critic_gradient_batch = []

                    epoch += 1
                    if epoch % MODEL_SAVE_INTERVAL == 0:
                        # Save the neural net parameters to disk.
                        save_path = saver.save(
                            sess, SUMMARY_DIR + "/nn_model_ep_" + str(epoch) +
                            ".ckpt")
                        print("Model saved in file: %s" % save_path)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)

示例#18

显示文件

文件： rlServer.py 项目： asu-cactus/lachesis

def run(port=8333, log_file_path=LOG_FILE):

    np.random.seed(RANDOM_SEED)
    with tf.Session() as sess, open(log_file_path, 'wb') as log_file:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=S_DIM,
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=S_DIM,
                                   learning_rate=CRITIC_LR_RATE)

        summary_ops, summary_vars = a3c.build_summaries()
        sess.run(tf.initialize_all_variables())
        writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph)
        saver = tf.train.Saver()  # save neural net parameters

        #restore neural net parameters
        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            saver.restore(sess, nn_model)
            print("Model restored.")

        init_action = np.zeros(A_DIM)
        #by default we simply use the first lambda
        init_action[DEFAULT_LAMBDA] = 0

        s_batch = [np.zeros(S_DIM)]
        a_batch = [init_action]
        r_batch = []
        entropy_record = []  #this is for training

        actor_gradient_batch = []  #this is for training
        critic_gradient_batch = []  #this is for training

        last_lambda = DEFAULT_LAMBDA
        epoch = 0
        end_of_training = False
        # Create a TCP/IP socket
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        # Bind the socket to the port
        server_address = ('localhost', port)
        print >> sys.stderr, 'starting up on %s port %s' % server_address
        sock.bind(server_address)

        # Listen for incoming connections
        sock.listen(5)
        count = 0

        while True:
            # Wait for a connection
            print >> sys.stderr, 'waiting for a connection'
            connection, addr = sock.accept()
            print 'Connected with ' + addr[0] + ':' + str(addr[1])

            # Receive the json file
            # json file format:
            # 'reward': float
            # 'state': array = '{"state": ["1", "3", "4", ...]}'
            #numBytes = sys.getsizeof(int)
            #print ("size to receive: " + str(numBytes))
            size = connection.recv(4)
            size = struct.unpack('!i', size)[0]
            print >> sys.stderr, 'received "%s"' % size
            data = connection.recv(size)
            jsonData = json.loads(data)
            print jsonData

            #to receive reward
            reward = float(jsonData['reward'])
            if (count > 0):
                r_batch.append(reward)
            else:
                r_batch.append(0.0)

            count = count + 1
            #to receive state
            stateArray = jsonData['state']
            state = np.array(stateArray)
            print(state)
            #to compute action
            action_prob = actor.predict(np.reshape(state, (1, S_DIM)))
            print("action_prob: ")
            print(action_prob)
            action_cumsum = np.cumsum(action_prob)
            print("action_cumsum: ")
            print(action_cumsum)
            print("comparison: ")
            print(action_cumsum >
                  np.random.randint(1, RAND_RANGE) / float(RAND_RANGE))
            selectedLambda = action_prob.argmax()
            #selectedLambda = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()
            print >> sys.stderr, 'selectedLambda "%s"' % selectedLambda
            #to update entropy
            entropy_record.append(a3c.compute_entropy(action_prob[0]))  #TODO

            #to update and apply gradient
            if len(r_batch) >= TRAIN_SEQ_LEN:
                actor_gradient, critic_gradient, td_batch = \
                   a3c.compute_gradients(s_batch=np.stack(s_batch[1:], axis=0),
                                        a_batch=np.vstack(a_batch[1:]),
                                        r_batch=np.vstack(r_batch[1:]),
                                        terminal=end_of_training, actor=actor, critic=critic)
                td_loss = np.mean(td_batch)

                print("td_loss: ")
                print(td_loss)
                print("actor_gradient: ")
                print(actor_gradient)
                print("critic_gradient: ")
                print(critic_gradient)

                actor_gradient_batch.append(actor_gradient)
                critic_gradient_batch.append(critic_gradient)

                entropy_record = []
                print("len(actor_gradient_batch) = ")
                print len(actor_gradient_batch)
                if len(actor_gradient_batch) >= GRADIENT_BATCH_SIZE:
                    print("GRADIENT_BATCH_SIZE reached")
                    assert len(actor_gradient_batch) == len(
                        critic_gradient_batch)
                    for i in xrange(len(actor_gradient_batch)):
                        print("###################" + str(i) +
                              "###################")
                        print(actor_gradient_batch[i])
                        print(critic_gradient_batch[i])
                        actor.apply_gradients(actor_gradient_batch[i])
                        critic.apply_gradients(critic_gradient_batch[i])

                    actor_gradient_batch = []
                    critic_gradient_batch = []

                    avg_reward = np.mean(r_batch)
                    summary_str = sess.run(summary_ops,
                                           feed_dict={
                                               summary_vars[0]: td_loss,
                                               summary_vars[1]: avg_reward
                                           })

                    writer.add_summary(summary_str, epoch)
                    writer.flush()
                    log_file.write(
                        str(datetime.datetime.now().strftime(
                            '%Y-%m-%d %H:%M:%S')) + '\t' + str(epoch) + '\t' +
                        str(avg_reward) + '\t' + str(td_loss) + '\n')
                    log_file.flush()

                    epoch += 1
                    if epoch % MODEL_SAVE_INTERVAL == 0:
                        # save the neural net parameters to disk.
                        save_path = saver.save(
                            sess, "./nn_model_ep_" + str(epoch) + ".ckpt")
                        print("Model saved in file: %s" % save_path)

                    if epoch == MAX_EPOCH:
                        end_of_training = True

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

            s_batch.append(state)
            action_vec = np.zeros(A_DIM)
            action_vec[selectedLambda] = 1
            a_batch.append(action_vec)

            #to send back action
            print >> sys.stderr, 'sending data back to the client'
            connection.sendall(struct.pack('!i', selectedLambda))
            last_lambda = selectedLambda
            connection.close()

        sock.close()

示例#19

显示文件

文件： multi_agent.py 项目： xgw/proj

def agent(agent_id, all_cooked_time, all_cooked_bw, net_params_queue,
          exp_queue):

    net_env = env.Environment(time=all_cooked_time,
                              bandwidth=all_cooked_bw,
                              random_seed=agent_id)

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []
        #need to initialize, and get before simulation step
        track_index = []
        hm = head_movement.move_prediction()

        time_stamp = 0
        while True:  # experience video streaming forever

            # the action is from the last decision
            # this is to make the framework similar to the real
            # xgw 20180918: need to modify here

            estimate_track_index = hm.get_head_movement_prediction()
            # actual_track_index = hm.get_head_movement_current()
            actual_track_index = [2, 3, 5, 6]


            delay, rebuf, buffer_size, sleep_time, video_chunk_size, end_of_video = \
                net_env.get_video_chunk(bit_rate, estimate_track_index)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # -- linear reward --
            # reward is video quality - rebuffer penalty - smoothness
            # xgw 20180918: need to modify the reward, add the qualiy consistency in viewport
            #               and the buffer
            # actually the consistency of quality in viewport is the error of head movement prediction error
            # so it's not sure that whether add the "quality consistency" here
            # don't know how to modelized the qp as first input
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
            # bit_rate_log_reward = np.log((bit_rate + 1) / A_DIM) * BIT_RATE_REWARD_PARAMETER
            # smooth_p = np.exp(np.abs(last_bit_rate - bit_rate) / A_DIM) * SMOOTH_PENALTY
            # reward = bit_rate - REBUF_PENALTY * rebuf - smooth_p

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            # state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            # state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 6 sec
            # state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            # state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            # state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            # state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            state[0, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 6 sec
            state[2, :4] = np.array(actual_track_index)
            state[3, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last chunk's bitrate

            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write('time_stamp: ' + str(time_stamp) + '\t' +
                           'VIDEO_BIT_RATE: ' + str(VIDEO_BIT_RATE[bit_rate]) +
                           '\t' + 'buffer_size: ' + str(buffer_size) + '\t' +
                           'rebuf: ' + str(rebuf) + '\t' +
                           'video_chunk_size: ' + str(video_chunk_size) +
                           '\t' + 'delay: ' + str(delay) + '\t' +
                           'avg throughtput: ' +
                           str(video_chunk_size / delay) + '\t' + 'reward: ' +
                           str(reward) + '\n')
            log_file.flush()

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN or end_of_video:
                exp_queue.put([
                    s_batch[1:],  # ignore the first chuck
                    a_batch[1:],  # since we don't have the
                    r_batch[1:],  # control over it
                    end_of_video,
                    {
                        'entropy': entropy_record
                    }
                ])

                # synchronize the network parameters from the coordinator
                actor_net_params, critic_net_params = net_params_queue.get()
                actor.set_network_params(actor_net_params)
                critic.set_network_params(critic_net_params)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del entropy_record[:]

                log_file.write(
                    '\n')  # so that in the log we know where video ends

            # store the state and action into batches
            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)

示例#20

显示文件

def main():
    # run_id = '0'
    rnd_ratio = 0.8
    if len(sys.argv) > 1:
        run_id = sys.argv[1]
    else:
        run_id = '0'
    seed = RANDOM_SEED + int(run_id)
    np.random.seed(seed)

    assert len(VIDEO_BIT_RATE) == A_DIM

    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)
    if not os.path.exists(TRANS_DIR):
        os.makedirs(TRANS_DIR)

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(cooked_trace_folder=TRACE_DIR)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=seed)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] + '_' + str(run_id)
    log_file = open(log_path, 'wb')
    trans_path = TRANS_FILE + '_' + all_file_names[net_env.trace_idx] + '_' + str(run_id)
    trans_file = open(trans_path, 'wb')

    last_action = deque(maxlen=2)
    last_action.append(1)
    last_action.append(1)

    with tf.Session() as sess:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN], action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            saver.restore(sess, nn_model)
            print("Model restored.")

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smoothness
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(str(time_stamp / M_IN_K) + '\t' +
                           str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                           str(buffer_size) + '\t' +
                           str(rebuf) + '\t' +
                           str(video_chunk_size) + '\t' +
                           str(delay) + '\t' +
                           str(reward) + '\n')
            log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
                old_state = np.zeros((S_INFO, S_LEN), dtype=np.float64)
            else:
                state = np.array(s_batch[-1], copy=True)
                old_state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            if np.random.random() < rnd_ratio:
                bit_rate = np.random.randint(0, A_DIM)
                print "random action", bit_rate
            send_data = str(bit_rate)
            trans_file.write('|'.join([str(list(old_state.reshape(-1))),
                                          str(list(action_prob.reshape(-1))),
                                          str(list(state.reshape(-1))),
                                          str(reward), str(send_data)]))
            trans_file.write('\n')
            trans_file.flush()

            # print 'state', list(old_state.reshape(-1))
            # print 'action', last_action[0]
            # print 'reward', reward

            last_action.append(send_data)
            s_batch.append(state)

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            if end_of_video:
                log_file.write('\n')
                log_file.close()

                trans_file.write('\n')
                trans_file.close()

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                print "video count", video_count, all_file_names[net_env.trace_idx]
                video_count += 1

                if video_count > len(all_file_names):
                    break

                log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] + '_' + str(run_id)
                log_file = open(log_path, 'wb')
                trans_path = TRANS_FILE + '_' + all_file_names[net_env.trace_idx] + '_' + str(run_id)
                trans_file = open(trans_path, 'wb')