示例#1
0
    def __init__(self, id, config):
        """
        Prioritized experience replay buffer initialization.
        """

        self.id = id
        self.config = config
        self.tree = SumTree(self.config.buffer_size)
        self.buffer_size = self.config.buffer_size
        self.batch_size = self.config.batch_size
        self.alpha = self.config.alpha

        self.experience = namedtuple(
            "Experience",
            field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(self.config.seed)
        self.step_lock = Lock()
        self.sample_lock = Lock()
        self.sample_lock.acquire()  # allow save_step first

        self.eps = EPSILON
        self.beta = 0.6

        self.beta_increment_per_sampling = 2. / float(self.config.total_step)

        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "PER_REPLAY_MEMORY_CONFIG", self.config)
示例#2
0
    def step(self, action):
        total_reward = 0.0
        rewards = []
        total_rb = 0.0
        for u in range(self.config.n_ue):
            a_n_rb = self.tmp_state[u].n_rb
            GLOBAL_LOGGER.get_tb_logger().add_scalar('required_n_rb.' + str(u), a_n_rb, self.n_step)
            if action[u] == 1 and self.tmp_state[u].q_length:
                total_rb += a_n_rb
        if total_rb <= self.config.ue_config.channel.total_n_rb:
            total_rb = self.config.ue_config.channel.total_n_rb

        for u in range(self.config.n_ue):
            if action[u] == 1 and self.tmp_state[u].q_length > 0:
                n_rb = round(float(self.tmp_state[u].n_rb) / total_rb * self.config.ue_config.channel.total_n_rb)
                GLOBAL_LOGGER.get_tb_logger().add_scalar('A_NRB_' + str(u), n_rb, self.n_step)
                r = self.ue_list[u].step(UE_RB_ACTION(n_rb))
                total_reward += r
                GLOBAL_LOGGER.get_tb_logger().add_scalar('UE_REWARD_' + str(u), r, self.n_step)
            else:
                self.ue_list[u].step(UE_RB_ACTION(0))
                r = 0
                GLOBAL_LOGGER.get_tb_logger().add_scalar('UE_REWARD_' + str(u), 0, self.n_step)

            rewards.append(r)

        GLOBAL_LOGGER.get_tb_logger().add_scalar('ENV_REWARD', total_reward, self.n_step)
        return np.array(rewards, dtype=float)
示例#3
0
    def run(self):
        rewards_his = np.zeros(self.config.n_ue)
        total_reward_his = 0
        for e in range(self.config.n_episode):
            self.init_env()
            for t in range(self.config.n_step):
                state = self.get_state()
                action = self.agent.get_action(state)
                action_ = np.copy(action)
                if self.config.action_conversion_f is None:
                    for u in range(self.config.n_ue):
                        if (action_[u] > 1.):
                            action_[u] = 1.
                else:
                    action_ = self.config.action_conversion_f(action_)
                rewards = self.step(action_)

                rewards_his = 0.99 * rewards_his + 0.01 * rewards
                for i in range(self.config.n_ue):
                    GLOBAL_LOGGER.get_tb_logger().add_scalar('UE_REWARD.moving_avg.' + str(i), rewards_his[i],
                                                             self.n_step)

                total_reward = np.sum(rewards)
                total_reward_his = 0.99 * total_reward_his + 0.01 * total_reward

                GLOBAL_LOGGER.get_tb_logger().add_scalar('ENV_REWARD.moving_avg', total_reward_his, self.n_step)

                next_state = self.get_state()
                done = 0
                if t == self.config.n_step - 1:
                    done = 1
                self.agent.save_step(state, action, rewards, next_state, done)
示例#4
0
    def step(self, action):
        '''

        :param action: 0 or 1 for Tx binary,  0~1 for the percentage of RBs
        :return:
        '''
        total_reward = 0.0
        rewards = []
        total_rb_pct = 0.0
        for u in range(self.config.n_ue):
            a_n_rb = action[u] * self.config.ue_config.channel.total_n_rb
            GLOBAL_LOGGER.get_tb_logger().add_scalar('required_n_rb.' + str(u), a_n_rb, self.n_step)
            total_rb_pct += action[u]
        if total_rb_pct < 1.:
            total_rb_pct = 1.
        for u in range(self.config.n_ue):
            if int(action[u] * self.config.ue_config.channel.total_n_rb) > 0 and self.tmp_state[u].q_length > 0:
                n_rb = round(float(action[u]) / total_rb_pct * self.config.ue_config.channel.total_n_rb)
                GLOBAL_LOGGER.get_tb_logger().add_scalar('A_NRB_' + str(u), n_rb, self.n_step)
                r = self.ue_list[u].step(UE_RB_ACTION(n_rb))
                total_reward += r
                GLOBAL_LOGGER.get_tb_logger().add_scalar('UE_REWARD_' + str(u), r, self.n_step)
            else:
                self.ue_list[u].step(UE_RB_ACTION(0))
                r = 0
                GLOBAL_LOGGER.get_tb_logger().add_scalar('UE_REWARD_' + str(u), 0, self.n_step)

            rewards.append(r)

        GLOBAL_LOGGER.get_tb_logger().add_scalar('ENV_REWARD', total_reward, self.n_step)
        return np.array(rewards, dtype=float)
示例#5
0
    def __init__(self, id, config):
        LearningModel.__init__(self)
        self.id = id
        self.config = config
        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "DDPG_CONFIG", self.config)

        self.actor = None
        self.actor_optim = None

        self.critic = None
        self.critic_optim = None

        self.actor_target = None

        self.critic_target = None

        self.init_model()
        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "actor_target_arch", self.actor_target)
        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "critic_target_arch", self.critic_target)

        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "actor_arch", self.actor)
        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "critic_arch", self.critic)

        self.step_counter = 0

        if USE_CUDA:
            self.move_nn_to_gpu()
示例#6
0
    def get_action(self, state):
        """
        Get the action from the actor
        :param state: env state in np
        :return: action in np
        """

        # if logic is present, the agent will use logic as the actor
        with self.actor_lock:
            if self.scheduler_function is None:
                state = to_tensor(state)
                ret = self.actor.forward(state)
                ret = to_numpy(ret)
            else:
                ret = self.scheduler_function.forward(state)

            noise_factor = math.exp(-self.n_step *
                                    self.config.noise_attenuation)
            if self.action_noise:
                GLOBAL_LOGGER.get_tb_logger().add_scalar(
                    "NOISE_FACTOR", noise_factor, self.n_step)
                ret = self.action_noise.add_noise(ret, noise_factor)
            for a in range(len(ret)):
                GLOBAL_LOGGER.get_tb_logger().add_scalar(
                    "ACTION_" + str(a), ret[a], self.n_step)
        return ret
示例#7
0
    def __init__(self, id, config, agent, learning_model, replay_memory):
        Thread.__init__(self)
        self.id = id
        self.config = config
        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "CONTROLLER_CONFIG", config)

        self.agent = agent
        self.model = learning_model
        self.replay_memory = replay_memory

        self.agent.update_actor(self.model.get_actor())
        if isinstance(self.agent, Thread):
            print("agent as a thread start it")
            self.agent.start()
        self.step = 0
示例#8
0
    def run(self):
        for x in range(self.config.total_step):
            batch = self.replay_memory.sample()
            if batch is not None:
                training_info = self.model.step(batch)
                if training_info is not None:
                    self.replay_memory.training_info(batch, training_info)
                self.agent.update_actor(self.model.get_actor())

            if x % 5000 == 0:
                output_file_path = GLOBAL_LOGGER.get_log_path()
                self.model.save(output_file_path, str(x))
                GLOBAL_LOGGER.reset_event_file()

        output_file_path = GLOBAL_LOGGER.get_log_path()

        self.model.save(output_file_path, 'final')
示例#9
0
    def __init__(self, id, config, replay_memory, scheduler_function=None):
        StatusObject.__init__(self)
        self.id = id
        self.env_id = None
        self.replay_memory = replay_memory
        self.actor = None
        self.actor_lock = Lock()
        self.config = config

        self.scheduler_function = scheduler_function

        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "AGENT_CONFIG", self.config)
        if isinstance(self.config.noise_config, OU_ACTION_NOISE_CONFIG):
            self.action_noise = OUActionNoise(self.id,
                                              self.config.noise_config)
        else:
            self.action_noise = None
示例#10
0
    def __init__(self, id, config):
        """Initialize a ReplayBuffer object.
        Params
        ======
            buffer_size (int): maximum size of buffer
            batch_size (int): size of each training batch
        """
        self.id = id
        self.config = config

        GLOBAL_LOGGER.get_tb_logger().add_text_of_object(
            "REPLAY_MEMORY_CONFIG", self.config)

        self.memory = deque(
            maxlen=self.config.buffer_size)  # internal memory (deque)
        self.experience = namedtuple(
            "Experience",
            field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(self.config.seed)
        self.step_lock = Lock()
        self.sample_lock = Lock()
        self.sample_lock.acquire()  # allow save_step first
示例#11
0
    def run(self):
        rewards_his = np.zeros(self.config.n_ue)
        total_reward_his = 0
        for e in range(self.config.n_episode):
            self.init_env()
            for t in range(self.config.n_step):
                state = self.get_state()
                action = self.agent.get_action(state)
                action_ = np.copy(action)
                phi = self.get_phi()

                if self.config.action_conversion_f is None:
                    action_[action_ > 0.] = 1
                    action_[action_ <= 0.] = 0
                else:
                    action_ = self.config.action_conversion_f(action_)
                rewards = self.step(action_)

                rewards_his = 0.99 * rewards_his + 0.01 * rewards
                for i in range(self.config.n_ue):
                    GLOBAL_LOGGER.get_tb_logger().add_scalar('UE_REWARD.moving_avg.' + str(i), rewards_his[i],
                                                             self.n_step)

                total_reward = np.sum(rewards)
                total_reward_his = 0.99 * total_reward_his + 0.01 * total_reward

                GLOBAL_LOGGER.get_tb_logger().add_scalar('ENV_REWARD.moving_avg', total_reward_his, self.n_step)

                next_state = self.get_state()
                phi_next = self.get_phi()
                shaper = - 1. * (phi - self.gamma * phi_next)
                for i in range(self.config.n_ue):
                    GLOBAL_LOGGER.get_tb_logger().add_scalar('shaper.' + str(i), shaper[i],
                                                             self.n_step)

                done = 0
                if t == self.config.n_step - 1:
                    done = 1
                self.agent.save_step(state, action, rewards + shaper, next_state, done)
示例#12
0
    def run(self):
        for x in range(self.config.total_step):
            t1 = time.time()
            batch = self.replay_memory.sample(asynchronization=True)
            if batch is not None:
                training_info = self.model.step(batch)
                if training_info is not None:
                    self.replay_memory.training_info(batch, training_info)
                self.agent.update_actor(self.model.get_actor())
                GLOBAL_LOGGER.get_tb_logger().add_scalar(
                    "training_time",
                    time.time() - t1, x)
            else:
                time.sleep(0.01)

            if x % 5000 == 0:
                output_file_path = GLOBAL_LOGGER.get_log_path()
                self.model.save(output_file_path, str(x))
                GLOBAL_LOGGER.reset_event_file()

        output_file_path = GLOBAL_LOGGER.get_log_path()

        self.model.save(output_file_path, 'final')
示例#13
0
    def step(self, batch):
        self._print("learn")
        states = to_tensor(batch[0])
        actions = to_tensor(batch[1])
        rewards = self._reward(to_tensor(batch[2]), states)
        next_states = to_tensor(batch[3])
        done = to_tensor(batch[4])
        self._print("states", batch[0])
        self._print("actions", batch[1])

        a = self.actor_target.forward(next_states)
        a = self._action_match(a)
        s_a = torch.cat((next_states, a), dim=1)
        q = self.critic_target.forward(s_a)
        y = torch.mul(q, self.config.rl_config.gamma)
        self._print("gamma", self.config.rl_config.gamma)
        self._print("rewards", rewards)
        self._print("q", q)

        y = torch.add(rewards, y).detach()
        self._print("y", y)

        actions = self._action_match(actions)
        s_a = torch.cat((states, actions), dim=1)
        q = self.critic.forward(s_a)
        l_critic = F.smooth_l1_loss(q, y, reduction='none')
        self._print("loss", l_critic)

        l_critic_per_batch = torch.sum(l_critic, dim=1, keepdim=True)
        self._print('l_critic_per_batch', l_critic_per_batch)
        ret_per_e = to_numpy(l_critic)
        ret_per_e = ret_per_e * self._per_w_multiplier(batch)
        self._print('ret_per_e_full', ret_per_e)
        ret_per_e = np.sum(ret_per_e, axis=1, keepdims=True)
        self._print('ret_per_e', ret_per_e)

        if len(batch) > 5:
            weights = to_tensor(batch[5])
            self._print("weights", weights)
            l_critic = torch.mul(l_critic_per_batch, weights)
            self._print("w_l_critic", l_critic)

        l_critic = torch.mean(l_critic)

        self.critic_optim.zero_grad()
        l_critic.backward()
        self.critic_optim.step()

        a = self.actor.forward(states)
        s_a = torch.cat((states, a), dim=1)
        l_actor = self.critic.forward(s_a)

        l_actor_per_batch = torch.sum(l_actor, dim=1, keepdim=True)
        if len(batch) > 5:
            weights = to_tensor(batch[5])
            self._print("weights", weights)
            l_actor = torch.mul(l_actor_per_batch, weights)
            self._print("w_l_actor", l_actor)

        l_actor = torch.mean(torch.neg(l_actor))

        self.actor_optim.zero_grad()
        l_actor.backward()
        self.actor_optim.step()

        GLOBAL_LOGGER.get_tb_logger().add_scalar('DDPG.loss_actor',
                                                 to_numpy(l_actor),
                                                 self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('DDPG.loss_critic',
                                                 to_numpy(l_critic),
                                                 self.n_step)

        self.update_nn()
        self.step_counter += 1

        return ret_per_e
示例#14
0
drl_c = exp_drl_config(2, int(1e9))
drl_c.actor_lr = 1e-4
drl_c.critic_lr = 1e-4
dir = os.path.dirname(os.path.abspath(__file__))
dir = os.path.join(dir, 'example_nn')
drl_c.actor_load_path = os.path.join(dir, 'actor_0.pt')
drl_c.critic_load_path = os.path.join(dir, 'critic_0.pt')

drl_c.reload_config()

assert isinstance(drl_c.actor_config.af_config[-1], nn.modules.Tanh)

log_path = os.path.dirname(os.path.realpath(__file__))
folder_name = "online/"
experiment_name = "online_example"
GLOBAL_LOGGER.set_log_path(log_path, folder_name, experiment_name)

rm = SimReplayMemory(0, drl_c.replay_memory_config)

nf = None
# def noise_f(p, t_now, t_start):
#     return p + 0.1 * p * np.random.randn() * math.exp(- (t_now - t_start) / 60)
#
# nf = noise_f

rt_agent = RTAgent(0, None, '0.0.0.0', 4000, drl_c.actor_config, GLOBAL_LOGGER,
                   nf)
rt_agent.set_replay_memory(rm)

ddpg = MultiHeadCriticDDPG(0, drl_c.ddpg_config)
示例#15
0
 def __init__(self, id, config, agent):
     Thread.__init__(self)
     SimEnv.__init__(self, id, config, agent)
     GLOBAL_LOGGER.get_tb_logger().add_text_of_object("ENV_CONFIG", self.config)
示例#16
0
from sim_src.sim_env.sim_agent import SimAgent
from sim_src.sim_env.sim_env import SimEnvTxBinary_RewardShaping
from sim_src.tb_logger import GLOBAL_LOGGER

env_c = env_config_helper()
drl_c = ddpg_config_helper(env_c.N_UE, env_c.N_STEP * env_c.N_EPISODE)

env_c.reload_config()
drl_c.reload_config()

import os

log_path = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
folder_name = "tb-data"
experiment_name = "ka"
GLOBAL_LOGGER.set_log_path(log_path, folder_name, experiment_name)
scalar_list = []
scalar = 'TX_DELAY_'
scalar_list.append(scalar)

scalar = 'N_RLCTX_'
scalar_list.append(scalar)

scalar = 'N_DISCARD_'
scalar_list.append(scalar)

scalar = 'RLC_REWARD_'
scalar_list.append(scalar)

scalar = 'N_CH_TX_OK_'
scalar_list.append(scalar)
        SctpServer.__init__(self, 'TBServer', server_bind_ip, server_bind_port,
                            100)
        self.logger = logger
        self.tb_client_listener_thread_list = []

    def connection_handler(self, conn):
        print('TBServer get conn from', conn)
        c = TBClientListener(conn, self.logger)
        c.start()
        self.tb_client_listener_thread_list.append(c)


if __name__ == '__main__':
    from sim_src.tb_logger import GLOBAL_LOGGER

    GLOBAL_LOGGER.set_log_path('/tmp/aaaaa/', 'test_tensor_board_server',
                               'test_tensor_board_server')

    t = scalar()
    t.tti = 1213
    t.name = 'hello'
    ts = Timestamp()
    t.timestamp.seconds = 10
    t.timestamp.nanos = 112310
    print(t.timestamp.ToMicroseconds())
    print(t.name)
    print(t.timestamp)
    print(t.tti)
    print(t.value)

    server = TBServer(server_bind_ip='127.0.1.100',
                      server_bind_port=TENSORBOARD_SERVER_PORT,
示例#18
0
    def step(self, action):
        ret = 0.
        n_txed = 0
        if action.tx:
            ret = self.get_hol_reward()
            if self.queue:
                GLOBAL_LOGGER.get_tb_logger().add_scalar(
                    'TX_DELAY_' + str(self.id), self.get_hol(), self.n_step)
                n_txed = 1
            self.pop()
        GLOBAL_LOGGER.get_tb_logger().add_scalar('N_RLCTX_' + str(self.id),
                                                 n_txed, self.n_step)

        n_discard = self.push()
        GLOBAL_LOGGER.get_tb_logger().add_scalar('N_PACKET_' + str(self.id),
                                                 self.n_packet, self.n_step)

        # assuming packet is arrived at the end of the last TTI
        self.time_step += 1

        n_discard += self.discard()

        GLOBAL_LOGGER.get_tb_logger().add_scalar('N_DISCARD_' + str(self.id),
                                                 n_discard, self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('RLC_REWARD_' + str(self.id),
                                                 ret, self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('HOL_' + str(self.id),
                                                 self.get_hol(), self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('Qsize_' + str(self.id),
                                                 self.get_n_byte_total(),
                                                 self.n_step)
        return ret
示例#19
0
    def step(self, action):
        err = 0.
        if action.n_rb > 0:
            err = tx_error_rate_for_n_bytes(action.n_byte, action.n_rb,
                                            db_to_dec(self.get_snr_db()),
                                            self.config.T_f, self.config.rb_bw)

            if action.n_rb >= self.config.total_n_rb and err < 1e-5:
                err = 1e-5
            if err < 1e-5:
                ret = 5.
            else:
                ret = -math.log10(err)
        else:
            ret = 0.

        n_successful_tx = 1
        if p_true(err):
            n_successful_tx = 0

        GLOBAL_LOGGER.get_tb_logger().add_scalar('NRB_' + str(self.id),
                                                 action.n_rb, self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('SNR_' + str(self.id),
                                                 self.get_snr_db(),
                                                 self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('E_' + str(self.id), err,
                                                 self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('DIS_' + str(self.id),
                                                 self.dis, self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('CH_REWARD_' + str(self.id),
                                                 ret, self.n_step)
        GLOBAL_LOGGER.get_tb_logger().add_scalar('N_CH_TX_OK_' + str(self.id),
                                                 n_successful_tx, self.n_step)
        self.change_position()

        return float(n_successful_tx)
示例#20
0
#  DRL-5G-Scheduler; Author: Zhouyou Gu ([email protected]);
#  Supervisors: Wibowo Hardjawana; Branka Vucetic;
#  This project is developed at Centre for IoT and Telecommunications at The University of Sydney,
#  under a project directly funded by Telstra Corporation Ltd., titled
#  ”Development of an Open Programmable Scheduler for LTE Networks”, from 2018 to 2019.
#  Reference: Z. Gu, C. She, W. Hardjawana, S. Lumb, D. McKechnie, T. Essery, and B. Vucetic,
#   “Knowledge-assisted deep reinforcement learning in 5G scheduler design:
#  From theoretical framework to implementation,” IEEE JSAC., to appear, 2021

from exp_src.tensorboard_server import *
from sim_src.tb_logger import GLOBAL_LOGGER

log_path = "./"
folder_name = "tb_server_log/"
experiment_name = "tb_server_log"
GLOBAL_LOGGER.set_log_path(log_path, folder_name, experiment_name)
server = TBServer(server_bind_ip='0.0.0.0',
                  server_bind_port=TENSORBOARD_SERVER_PORT,
                  logger=GLOBAL_LOGGER.get_tb_logger())
server.start()
server.join()