示例#1
0
文件: main.py 项目: DanzhouWu/TSRA
def main(N1, N2, D, parameter, iteration=int(1e5)):

    agent_list = [ALOHA_AGENT(D=D, arrival_rate=parameter[i], trans_prob=1/N2) \
        for i in range(N2)] # parameterss pb2

    n1_list = [
        ALOHA_AGENT(D=D, arrival_rate=0.5, trans_prob=1 / (4 * N1))
        for _ in range(N1)
    ]

    agent_list.extend(n1_list)

    channels = list(parameter[N2:])  # parameters ps2

    n1_channels = [0.5 for _ in range(N1)]
    channels.extend(n1_channels)
    env = ENVIRONMENT(channels=channels, agent_list=agent_list)

    reward_list = []
    energy_list = []

    for t in tqdm(range(iteration)):
        reward, energy, observations = env.step(time=t)

        for i in range(N1 + N2):
            env.agent_list[i].update(observation=observations[i])

        reward_list.append(reward)
        energy_list.append(energy)

    throughput, power = np.mean(reward_list[-int(1e4):]), np.mean(
        energy_list[-int(1e4):])
    print('Throu = {}'.format(throughput))
    print('Energy = {}'.format(power))
    return throughput, power
示例#2
0
文件: main.py 项目: leideng/TSRA
def upper_bound(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):

    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    # get LP agent policy
    LP_policy = multichainLP(D=D,
                             D_=D_,
                             pb1=pb1,
                             pt1=pt1,
                             ps1=ps1,
                             pb2=pb2,
                             ps2=ps2)

    sp_agent = SPECIFY_AGENT(D=D_, arrival_rate=pb2, policy=LP_policy)
    sp_agent.initialize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=sp_agent)

    UP_reward = []
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation, aloha_queue=env.aloha.queue)
        UP_reward.append(aloha_reward + agent_reward)

    Upper_bound_timely_throughput = np.mean(UP_reward)
    print('Upper_bound_timely_throughput:', Upper_bound_timely_throughput)
示例#3
0
文件: main.py 项目: leideng/TSRA
def HSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e6)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    HSRA_agent = HSRA_AGENT(D=D_,
                            arrival_rate=pb2,
                            learning_rate=0.01,
                            gamma=0.9,
                            length=1)
    HSRA_agent.initailize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=HSRA_agent)
    HSRA_reward = []

    # begin = time.time()
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation)
        HSRA_reward.append(aloha_reward + agent_reward)

    HSRA_timely_throughput = np.mean(HSRA_reward[-int(1e5):])
    print('HSRA_timely_throughput:', HSRA_timely_throughput)
示例#4
0
def DLMA_FNN(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    DLMA = DQN_AGENT(D=D_,
                     arrival_rate=pb2,
                     state_size=160,
                     n_actions=2,
                     n_nodes=2,
                     memory_size=1000,
                     replace_target_iter=20,
                     batch_size=64,
                     learning_rate=0.01,
                     gamma=0.9,
                     epsilon=1,
                     epsilon_min=0.005,
                     epsilon_decay=0.995,
                     alpha=0)

    DLMA.initailize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=DLMA)
    state = [0] * DLMA.state_size

    DLMA_FNN_reward = []

    begin = time.time()
    for i in tqdm(range(iteration)):
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation)
        env.agent.update(observation, state)
        DLMA_FNN_reward.append(aloha_reward + agent_reward)

        next_state = state[8:] + return_action(
            env.agent.action) + return_observation(observation) + [
                agent_reward, aloha_reward
            ]

        env.agent.store_transition(state, env.agent.action, agent_reward,
                                   aloha_reward, next_state)
        if i > 100 and (i % 5 == 0):
            env.agent.learn()  # internally iterates default (prediction) model
        state = next_state

    DLMA_FNN_timely_throughput = np.mean(DLMA_FNN_reward)
    print('DLMA_FNN_timely_throughput:', DLMA_FNN_timely_throughput)

    end = time.time()
    print(u'当前进程的运行时间: ', (end - begin), 's')
    print(u'当前进程的内存使用:%.4f MB' %
          (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))
示例#5
0
文件: main.py 项目: DanzhouWu/TSRA
def main(n, D, l, d, eta, pb1, pt1, ps1, pb2, ps2, T=int(1e5)):

    begin = time.time()
    agents_list = [ALOHA_AGENT(D, arrival_rate=pb1, trans_prob=pt1), \
        Learn2MAC(D, l, d, eta, T, pb2, ps2)]

    env = Environment(agents_list=agents_list, channels=[ps1, ps2])

    reward_list = []
    energy_list = []
    for t in tqdm(range(T)):
        for agent in agents_list:
            agent.select_action(t)

        reward, energy, patterns = env.step(t)
        reward_list.append(reward)
        energy_list.append(energy)

        for agent in agents_list:
            if isinstance(agent, ALOHA_AGENT): agent.update(reward)  # aloha
            else: agent.update(t, reward, patterns)  # learn2mac

    end = time.time()
    print('Throu = {}'.format(np.mean(reward_list[-int(1e4):])))
    print('Energy = {}'.format(np.mean(energy_list[-int(1e4):])))
    print('Time = {}s'.format(end - begin))
    print('Memory = {}MB'.format(
        psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))
示例#6
0
文件: main.py 项目: DanzhouWu/TSRA
def FSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e7)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    FSRA_agent = FSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01)
    FSRA_agent.initailize()

    env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=FSRA_agent)

    FSRA_reward = []
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward ,observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation)
        FSRA_reward.append(aloha_reward + agent_reward)

    FSRA_timely_throughput = np.mean(FSRA_reward[-int(1e5):])
    print('FSRA_timely_throughput:', FSRA_timely_throughput)
示例#7
0
文件: main.py 项目: leideng/TSRA
def TSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    TSRA_agent = TSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01, gamma=0.9, length=1)
    TSRA_agent.initailize()

    env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=TSRA_agent)
    TSRA_reward = []

    begin = time.time()
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward ,observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation)
        TSRA_reward.append(aloha_reward + agent_reward)

    TSRA_timely_throughput = np.mean(TSRA_reward)
    print('TSRA_timely_throughput:', TSRA_timely_throughput)

    end = time.time()
    print('time: ' , (end - begin), 's')
    print('memory: %.4f MB' % (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024) )
示例#8
0
def DLMA_RNN(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    DLMA = DQN(D=D_,
               arrival_rate=pb2,
               features=8,
               n_actions=2,
               n_nodes=2,
               state_length=4,
               memory_size=1000,
               replace_target_iter=20,
               batch_size=64,
               learning_rate=0.01,
               gamma=0.9,
               epsilon=1,
               epsilon_min=0.005,
               epsilon_decay=0.995,
               alpha=0)

    DLMA.initailize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=DLMA)

    channel_state = [0] * DLMA.features
    state = np.zeros((4, len(channel_state)))

    DLMA_RNN_reward = []
    begin = time.time()
    for i in tqdm(range(iteration)):
        state = np.vstack([state[1:], channel_state])
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation)
        env.agent.update(observation, state)

        DLMA_RNN_reward.append(aloha_reward + agent_reward)
        next_channel_state = return_action(
            env.agent.action) + return_observation(observation) + [
                agent_reward, agent_reward
            ]
        experience = np.concatenate([
            channel_state, [env.agent.action, agent_reward, agent_reward],
            next_channel_state
        ])

        env.agent.add_experience(experience)

        if i > 100 and (i % 5 == 0):
            env.agent.learn()  # internally iterates default (prediction) model
        channel_state = next_channel_state

    DLMA_RNN_timely_throughput = np.mean(DLMA_RNN_reward)
    print('DLMA_RNN_timely_throughput:', DLMA_RNN_timely_throughput)

    end = time.time()
    print('time: ', (end - begin), 's')
    print('memory: %.4f MB' %
          (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))