示例#1
0
文件: main.py 项目: DanzhouWu/TSRA
def DLMA_FNN(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    DLMA = DQN_AGENT(D=D_,
                     arrival_rate=pb2,
                     state_size=160,
                     n_actions=2,
                     n_nodes=2,
                     memory_size=1000,
                     replace_target_iter=20,
                     batch_size=64,
                     learning_rate=0.01,
                     gamma=0.9,
                     epsilon=1,
                     epsilon_min=0.005,
                     epsilon_decay=0.995,
                     alpha=0)

    DLMA.initailize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=DLMA)
    state = [0] * DLMA.state_size

    DLMA_FNN_reward = []

    begin = time.time()
    for i in tqdm(range(iteration)):
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation)
        env.agent.update(observation, state)
        DLMA_FNN_reward.append(aloha_reward + agent_reward)

        next_state = state[8:] + return_action(
            env.agent.action) + return_observation(observation) + [
                agent_reward, aloha_reward
            ]

        env.agent.store_transition(state, env.agent.action, agent_reward,
                                   aloha_reward, next_state)
        if i > 100 and (i % 5 == 0):
            env.agent.learn()  # internally iterates default (prediction) model
        state = next_state

    DLMA_FNN_timely_throughput = np.mean(DLMA_FNN_reward)
    print('DLMA_FNN_timely_throughput:', DLMA_FNN_timely_throughput)

    end = time.time()
    print(u'当前进程的运行时间: ', (end - begin), 's')
    print(u'当前进程的内存使用:%.4f MB' %
          (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))
示例#2
0
def main():

    parser = argparse.ArgumentParser(
        description='PyTorch actor-critic example')

    parser.add_argument('--hidden_layer_size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='Hidden Layer Size (default: 128)')

    args = parser.parse_args()
    for i in range(1):
        naf_environment = ENVIRONMENT(args, i)
        naf_environment.run()
示例#3
0
    def __init__(self):

        self.envs = {}

        for e in range(0, c.numEnvs):
            #self.ID = e
            self.envs[e] = ENVIRONMENT(e)
示例#4
0
    def __init__(self):

        self.environments = {}

        for e in range(0,c.numEnvironments):

            self.environments[e] = ENVIRONMENT()
    def __init__(self):

        self.envs = {}

        for i in range(0, c.numEnvs):

            self.envs[i] = ENVIRONMENT(i)
示例#6
0
    def __init__(self, numEnvs=4, eval_time=400):
        self.envs = {}
        self.numEnvs = numEnvs
        self.eval_time = eval_time

        for i in range(self.numEnvs):
            self.envs[i] = ENVIRONMENT(i, eval_time=self.eval_time)
示例#7
0
def main():

    parser = argparse.ArgumentParser(description='PyTorch actor-critic example')
    
    parser.add_argument('--hidden_layer_size', type=int, default=128, metavar='N',
                        help='Hidden Layer Size (default: 128)')
    parser.add_argument('--a_param', type=float, default=0.95, metavar='G',
                        help='dynamics a_parameter')
    parser.add_argument('--b_param', type=float, default=5.5, metavar='G',
                        help='dynamics b_parameter')
    
    args = parser.parse_args()
    
    for i in range(1):
        naf_environment = ENVIRONMENT(args, i)
        naf_environment.run()
    
    print("Learning Process Finished")
示例#8
0
文件: main.py 项目: DanzhouWu/TSRA
def FSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e7)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    FSRA_agent = FSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01)
    FSRA_agent.initailize()

    env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=FSRA_agent)

    FSRA_reward = []
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward ,observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation)
        FSRA_reward.append(aloha_reward + agent_reward)

    FSRA_timely_throughput = np.mean(FSRA_reward[-int(1e5):])
    print('FSRA_timely_throughput:', FSRA_timely_throughput)
示例#9
0
def main(n2, D, parameter, iteration=int(1e5)):
    agent_list = [DQN_AGENT(D=D, 
                            arrival_rate=parameter[i], 
                            state_size=int(8*M),
                            n_actions=2, 
                            n_nodes=2,
                            memory_size=E,
                            replace_target_iter=F,
                            batch_size=B,
                            ) for i in range(n2)]

    env = ENVIRONMENT(channels=parameter[n2:], agent_list=agent_list)

    reward_list = []
    energy_list = []

    state = [[0] * int(8*M) for _ in range(n2)]
    next_state = [[0] * int(8*M) for _ in range(n2)]
    for t in tqdm(range(iteration)):
        for i in range(n2):
            env.agent_list[i].choose_action(np.array(state[i]))

        reward, energy, observations = env.step(time=t) 

        reward_list.append(reward)
        energy_list.append(energy)

        for i in range(n2): 
            env.agent_list[i].update_queue(observation=observations[i])
            next_state[i], agent_reward, others_reward = return_next_state(i, state[i], env.agent_list, observations, reward)
            env.agent_list[i].store_transition(state[i], env.agent_list[i].action, agent_reward, others_reward, next_state[i])

        if t > 100 and t % 5 == 0:
            for i in range(n2):
                env.agent_list[i].learn() 

        state = copy.deepcopy(next_state)

    throughput, power = np.mean(reward_list[-int(1e4):]), np.mean(energy_list[-int(1e4):]) 
    print('Throu = {}'.format(throughput))
    print('Energy = {}'.format(power))
    return throughput, power
示例#10
0
def TSRA(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)

    TSRA_agent = TSRA_AGENT(D=D_, arrival_rate=pb2, learning_rate=0.01, gamma=0.9, length=1)

    env = ENVIRONMENT(aloha_channel=ps1, agent_channel=ps2, aloha=aloha, agent=TSRA_agent)
    TSRA_reward = []

    begin = time.time()
    for _ in tqdm(range(iteration)):
        aloha_reward, agent_reward ,observation = env.step()
        env.aloha.update(observation=observation)
        env.agent.update(observation=observation)
        TSRA_reward.append(aloha_reward + agent_reward)

    TSRA_timely_throughput = np.mean(TSRA_reward)
    print('TSRA_timely_throughput:', TSRA_timely_throughput)

    end = time.time()
    print('time: ' , (end - begin), 's')
    print('memory: %.4f MB' % (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024) )
示例#11
0
文件: main.py 项目: DanzhouWu/TSRA
def main(N1, N2, D, parameter, iteration=int(1e5)):

    agent_list = [TSRA_AGENT(D=D, arrival_rate=parameter[i], learning_rate=0.01, length=1) \
        for i in range(N2)] # parameterss pb2
    n1_list = [
        ALOHA_AGENT(D=D, arrival_rate=1, trans_prob=1 / (4 * N1))
        for _ in range(N1)
    ]
    agent_list.extend(n1_list)

    channels = list(parameter[N2:])  # parameters ps2
    n1_channels = [0.5 for _ in range(N1)]
    channels.extend(n1_channels)

    env = ENVIRONMENT(channels=channels, agent_list=agent_list)

    reward_list = []
    energy_list = []

    for time in tqdm(range(iteration)):
        reward, energy, observations = env.step(time=time)

        for i in range(N2):
            env.agent_list[i].update(observation=observations[i],
                                     time=time,
                                     N=N2)

        for i in range(N1):
            env.agent_list[N2 + i].update(observation=observations[N2 + i])

        reward_list.append(reward)
        energy_list.append(energy)

    throughput, power = np.mean(reward_list[-int(1e4):]), np.mean(
        energy_list[-int(1e4):])
    print('Throu = {}'.format(throughput))
    print('Energy = {}'.format(power))
    return throughput, power
示例#12
0
def main():
    #torch.utils.backcompat.broadcast_warning.enabled = True
    #torch.utils.backcompat.keepdim_warning.enabled = True

    #torch.set_default_tensor_type('torch.DoubleTensor')

    parser = argparse.ArgumentParser(
        description='PyTorch NAF-pendulum example')

    parser.add_argument('--gamma',
                        type=float,
                        default=0.99,
                        metavar='G',
                        help='discount factor (default: 0.99)')
    parser.add_argument('--tau',
                        type=float,
                        default=0.005,
                        metavar='G',
                        help='soft update parameter (default: 1e-3)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='Batch size (default: 128)')
    parser.add_argument('--replay_buffer_size',
                        type=int,
                        default=1e6,
                        metavar='N',
                        help='Replay Buffer Size (default: 1e6)')
    parser.add_argument('--hidden_layer_size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='Hidden Layer Size (default: 64)')
    parser.add_argument('--lr',
                        type=float,
                        default=5e-5,
                        metavar='G',
                        help='Learning rate of Actor Network (default: 1e-4)')
    parser.add_argument('--max_episode',
                        type=float,
                        default=1,
                        metavar='N',
                        help='Max Episode (default: 200)')
    parser.add_argument('--noise_scale',
                        type=float,
                        default=0.01,
                        metavar='G',
                        help='initial noise scale (default: 1.0)')
    parser.add_argument('--final_noise_scale',
                        type=float,
                        default=0.01,
                        metavar='G',
                        help='final noise scale (default: 0.001)')

    parser.add_argument('--a_param',
                        type=float,
                        default=0.95,
                        metavar='G',
                        help='a_param (default: 0.95)')
    parser.add_argument('--b_param',
                        type=float,
                        default=25.5,
                        metavar='G',
                        help='b_param (default: 5.0~100.0)')

    args = parser.parse_args()

    for i in range(1):
        SEED = 1
        fixed_seed.fixed_seed_function(SEED)
        naf_environment = ENVIRONMENT(args, i)
        naf_environment.run()

    print("Learning Process Finished")
示例#13
0
    print('average wifi reward: {}'.format(np.mean(wifi_reward_list[-2000:])))
    print('average total reward:{}'.format(
        np.mean(agent_reward_list[-2000:]) +
        np.mean(wifi_reward_list[-2000:])))
    print('Time elapsed:', time.time() - start)

    ### save training loss
    # dqn_agent.my_plot('len1e5_M20_W2_alpha50_g0.999_MM6_r10_1')


if __name__ == "__main__":
    RATIO = 10  # the packet length of WiFi
    NUM_ACTIONS = 11  # the number of actions 0-10
    env = ENVIRONMENT(features=NUM_ACTIONS + 4,
                      ratio=RATIO,
                      n_actions=NUM_ACTIONS,
                      init_wifi_window_size=2,
                      max_backoff=6,
                      penalty=0.5)

    dqn_agent = DQN(env.features,
                    env.ratio,
                    env.n_actions,
                    env.n_nodes,
                    history_len=20,
                    memory_size=1000,
                    replace_target_iter=20,
                    batch_size=32,
                    learning_rate=0.01,
                    gamma=0.999,
                    epsilon=1,
                    epsilon_min=0.005,
示例#14
0
    # dqn_agent.save_model("models/model_len5e5_M20_h6_q0.1_1.h5")
    # print the results
    print('-----------------------------')
    print('average agent reward: {}'.format(np.mean(
        agent_reward_list[-2000:])))
    print('average aloha reward: {}'.format(np.mean(
        aloha_reward_list[-2000:])))
    print('average total reward: {}'.format(
        np.mean(agent_reward_list[-2000:]) +
        np.mean(aloha_reward_list[-2000:])))
    print('Time elapsed:', time.time() - start)


if __name__ == "__main__":
    env = ENVIRONMENT(
        state_size=40,
        window_size=3,
    )

    dqn_agent = DQN(
        env.state_size,
        env.n_actions,
        env.n_nodes,
        memory_size=500,
        replace_target_iter=200,
        batch_size=32,
        learning_rate=0.01,
        gamma=0.9,
        epsilon=0.1,
        epsilon_min=0.005,
        epsilon_decay=0.995,
    )
示例#15
0
    n_actions = 2 # number of actions

    M = 4 # state length
    E = 1000 # memory size
    F = 20 # target network update frequency
    B = 64 # mini-batch size
    gamma = 0.9 # discount factor

    alpha = 1 # fairness index
    
    max_iter = int(5e4)
    idx = 1


    env = ENVIRONMENT(features=8, 
                      window_size=4
                      )

    agent = DQN(env.features,
                    n_nodes,
                    n_actions,
                    state_length=M,  
                    memory_size=E,
                    replace_target_iter=F,
                    batch_size=B,
                    gamma=gamma,
                    epsilon=1,
                    epsilon_min=0.005,
                    epsilon_decay=0.995,
                    alpha=alpha
                    )
示例#16
0
if __name__ == "__main__":

    n_nodes = 2  # number of nodes
    n_actions = 2  # number of actions

    M = 20  # state length
    E = 1000  # memory size
    F = 20  # target network update frequency
    B = 64  # mini-batch size
    gamma = 0.9  # discount factor

    alpha = 1  # fairness index

    max_iter = int(5e4)
    idx = 1

    env = ENVIRONMENT(state_size=int(8 * M), tx_prob=0.2)

    agent = DQN(env.state_size,
                n_nodes,
                n_actions,
                memory_size=E,
                replace_target_iter=F,
                batch_size=B,
                gamma=gamma,
                epsilon=1,
                epsilon_min=0.005,
                epsilon_decay=0.995,
                alpha=alpha)

    main(env.tx_prob, M, E, F, B, gamma, alpha, idx, max_iter)
示例#17
0
def DLMA_RNN(D, D_, pb1, pt1, ps1, pb2, ps2, iteration=int(1e5)):
    aloha = ALOHA_AGENT(D=D, arrival_rate=pb1, trans_prob=pt1)
    aloha.initialize()

    DLMA = DQN(D=D_,
               arrival_rate=pb2,
               features=8,
               n_actions=2,
               n_nodes=2,
               state_length=4,
               memory_size=1000,
               replace_target_iter=20,
               batch_size=64,
               learning_rate=0.01,
               gamma=0.9,
               epsilon=1,
               epsilon_min=0.005,
               epsilon_decay=0.995,
               alpha=0)

    DLMA.initailize()

    env = ENVIRONMENT(aloha_channel=ps1,
                      agent_channel=ps2,
                      aloha=aloha,
                      agent=DLMA)

    channel_state = [0] * DLMA.features
    state = np.zeros((4, len(channel_state)))

    DLMA_RNN_reward = []
    begin = time.time()
    for i in tqdm(range(iteration)):
        state = np.vstack([state[1:], channel_state])
        aloha_reward, agent_reward, observation = env.step()
        env.aloha.update(observation)
        env.agent.update(observation, state)

        DLMA_RNN_reward.append(aloha_reward + agent_reward)
        next_channel_state = return_action(
            env.agent.action) + return_observation(observation) + [
                agent_reward, agent_reward
            ]
        experience = np.concatenate([
            channel_state, [env.agent.action, agent_reward, agent_reward],
            next_channel_state
        ])

        env.agent.add_experience(experience)

        if i > 100 and (i % 5 == 0):
            env.agent.learn()  # internally iterates default (prediction) model
        channel_state = next_channel_state

    DLMA_RNN_timely_throughput = np.mean(DLMA_RNN_reward)
    print('DLMA_RNN_timely_throughput:', DLMA_RNN_timely_throughput)

    end = time.time()
    print('time: ', (end - begin), 's')
    print('memory: %.4f MB' %
          (psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024))
示例#18
0
    n_nodes = 2  # number of nodes
    n_actions = 2  # number of actions

    M = 4  # state length
    E = 1000  # memory size
    F = 20  # target network update frequency
    B = 64  # mini-batch size
    gamma = 0.9  # discount factor

    alpha = 0  # fairness index

    max_iter = int(1e4)
    idx = 1

    env = ENVIRONMENT(features=8, )

    agent = DQN(env.features,
                n_nodes,
                n_actions,
                state_length=M,
                memory_size=E,
                replace_target_iter=F,
                batch_size=B,
                gamma=gamma,
                epsilon=1,
                epsilon_min=0.005,
                epsilon_decay=0.995,
                alpha=alpha)

    main(M, E, F, B, gamma, alpha, idx, max_iter)
示例#19
0
    n_actions = 2 # number of actions

    M = 20 # state length
    E = 1000 # memory size
    F = 20 # target network update frequency
    B = 64 # mini-batch size
    gamma = 0.9 # discount factor

    alpha = 0 # fairness index
    
    max_iter = int(5e4)
    idx = 1


    env = ENVIRONMENT(state_size=int(8*M), 
                      window_size=4,
                      max_backoff=2
                      )

    agent = DQN(env.state_size,
                    n_nodes,
                    n_actions,  
                    memory_size=E,
                    replace_target_iter=F,
                    batch_size=B,
                    gamma=gamma,
                    epsilon=1,
                    epsilon_min=0.005,
                    epsilon_decay=0.995,
                    alpha=alpha
                    )
示例#20
0
 def __init__(self):
     self.envs = {i: ENVIRONMENT(i) for i in range(C.numEnvs)}
示例#21
0
            my_aloha.write(str(i) + '   ') 
    # save model 
    # dqn_agent.save_model("models/model_len1e5_M20_h6_q0.1_2.h5")  
    # print the results
    print('-----------------------------')
    print('average agent reward: {}'.format(np.mean(agent_reward_list[-2000:])))
    print('average aloha reward: {}'.format(np.mean(aloha_reward_list[-2000:])))
    print('average total reward: {}'.format(np.mean(agent_reward_list[-2000:]) + 
                                            np.mean(aloha_reward_list[-2000:])))
    print('Time elapsed:', time.time()-start)



if __name__ == "__main__":
    env = ENVIRONMENT(state_size=80, 
                      window_size=7,
                      )

    dqn_agent = DQN(env.state_size,
                    env.n_actions,  
                    env.n_nodes,
                    memory_size=500,
                    replace_target_iter=200,
                    batch_size=32,
                    learning_rate=0.01,
                    gamma=0.9,
                    epsilon=0.1,
                    epsilon_min=0.005,
                    epsilon_decay=0.995,
                    )
示例#22
0
                  ' reward: {}'.format(np.mean(agent_reward_list[-2000:][j])))
        #print('average aloha reward: {}'.format(np.mean(aloha_reward_list[-2000:])))
        #print('average tdma  reward: {}'.format(np.mean(tdma_reward_list[-2000:])))
        print('average total reward: {}'.format(
            np.mean(agent_reward_list[-2000:])))  # +
        #np.mean(aloha_reward_list[-2000:]) +
        #np.mean(tdma_reward_list[-2000:])))
        #print('tdma prob: %i' % env.tdmaPrb)
        #print('aloha prob: %i' % env.alohaPrb)
        print('agent prob: %i' % env.agentPrb)
        print('Time elapsed:', time.time() - start)


if __name__ == "__main__":
    env = ENVIRONMENT(
        state_size=NN * 40,
        attempt_prob=0.2,
    )

    dqn_agent = DQN(
        env.state_size,
        env.n_actions,
        env.n_nodes,
        memory_size=NN * 500,
        replace_target_iter=200,
        batch_size=NN * 32,
        learning_rate=0.01,
        gamma=0.9,
        epsilon=0.1,
        epsilon_min=0.005,
        epsilon_decay=0.995,
    )
示例#23
0
 def Initialize(self):
     for e in range(0, c.numEnvs):
         self.envs[e] = ENVIRONMENT(e=e)
示例#24
0
if __name__ == "__main__":

    n_nodes = 2  # number of nodes
    n_actions = 2  # number of actions

    M = 20  # state length
    E = 1000  # memory size
    F = 20  # target network update frequency
    B = 64  # mini-batch size
    gamma = 0.9  # discount factor

    alpha = 1  # fairness index

    max_iter = int(1e4)
    idx = 1

    env = ENVIRONMENT(state_size=int(8 * M), )

    agent = DQN(env.state_size,
                n_nodes,
                n_actions,
                memory_size=E,
                replace_target_iter=F,
                batch_size=B,
                gamma=gamma,
                epsilon=1,
                epsilon_min=0.005,
                epsilon_decay=0.995,
                alpha=alpha)

    main(M, E, F, B, gamma, alpha, idx, max_iter)
示例#25
0
AUV1 = AUV()

# allocate vehicle to a mission
setattr(AUV1, 'mission', UEXP)

# set vehicle parameters
AUV1.origin = (16, 24, -9.5)  # simple
AUV1.goal = (3, 30, -9.5)  # simple
#AUV1.origin = (3,17,-9.5) # simple
#AUV1.goal =  (10,40,-9.5) # simple
#AUV1.origin = (70,100,-9.5) # advanced
#AUV1.goal =  (60,40,-9.5) # advanced
AUV1.speed = 2.0

# define our visualization output & create it
ENV1 = ENVIRONMENT(UEXP, ReefFunction='reef')
ENV1.UnknownRegions = { \
                       0.8: [(50, 15), (43, 25), (80, 25), (88, 19), (90,18)], \
                       0.4: [(80, 84), (95, 80), (95, 92), (76, 95)], \
                       0.1: [(11, 8), (40, 0), (40, 17), (11, 11)] \
                       }
ENV1.RiskField = ENV1.GenerateRiskField()
sigma = 1.5
ENV1.RiskField = blurRiskField(ENV1.RiskField, sigma)
ENV1.CurrentField_x, ENV1.CurrentField_y = ENV1.GenerateCurrentField(
    type="whirlpool", max_strength=1)

VIS1 = VISUALIZATION(AUV1, ENV1)
VIS1.ShowReef()
# #VIS1.ShowCurrent()
VIS1.ShowRisk()
示例#26
0
    n_nodes = 2  # number of nodes
    n_actions = 2  # number of actions

    M = 4  # state length
    E = 1000  # memory size
    F = 20  # target network update frequency
    B = 64  # mini-batch size
    gamma = 0.9  # discount factor

    alpha = 1  # fairness index

    max_iter = int(5e4)
    idx = 1

    env = ENVIRONMENT(features=8, tx_prob=0.2)

    agent = DQN(env.features,
                n_nodes,
                n_actions,
                state_length=M,
                memory_size=E,
                replace_target_iter=F,
                batch_size=B,
                gamma=gamma,
                epsilon=1,
                epsilon_min=0.005,
                epsilon_decay=0.995,
                alpha=alpha)

    main(env.tx_prob, M, E, F, B, gamma, alpha, idx, max_iter)
示例#27
0
    print('average total reward:{}'.format(
        np.mean(agent_reward_list[-2000:]) +
        np.mean(aloha_reward_list[-2000:]) +
        np.mean(tdma_reward_list[-2000:])))
    print('Time elapsed:', time.time() - start)
    dqn_agent.my_plot('len1e5_M20_g0.999_q0.5_t2-5_alpha50_r10_h20_1')


if __name__ == "__main__":
    RATIO1 = RATIO  # ALOHA packet length
    RATIO2 = RATIO  # TDMA packet length
    NUM_ACTIONS = RATIO1 + 1
    env = ENVIRONMENT(
        state_size=300,  # 15*20
        aloha_ratio=RATIO1,
        tdma_ratio=RATIO2,
        n_actions=NUM_ACTIONS,
        transmission_prob=0.5,
        penalty=0.5,
    )

    dqn_agent = DQN(env.state_size,
                    env.aloha_ratio,
                    env.tdma_ratio,
                    env.n_actions,
                    env.n_nodes,
                    memory_size=1000,
                    replace_target_iter=20,
                    batch_size=32,
                    learning_rate=0.01,
                    gamma=0.999,
                    epsilon=1,
示例#28
0
    # print the results
    print('-----------------------------')
    print('average agent reward: {}'.format(np.mean(
        agent_reward_list[-2000:])))
    print('average aloha reward: {}'.format(np.mean(
        aloha_reward_list[-2000:])))
    print('average tdma  reward: {}'.format(np.mean(tdma_reward_list[-2000:])))
    print('average total reward: {}'.format(
        np.mean(agent_reward_list[-2000:]) +
        np.mean(aloha_reward_list[-2000:]) +
        np.mean(tdma_reward_list[-2000:])))
    print('Time elapsed:', time.time() - start)


if __name__ == "__main__":
    env = ENVIRONMENT(state_size=40, attempt_prob=0.7)

    dqn_agent = DQN(
        env.state_size,
        env.n_actions,
        env.n_nodes,
        memory_size=500,
        replace_target_iter=200,
        batch_size=32,
        learning_rate=0.01,
        gamma=0.9,
        epsilon=0.1,
        epsilon_min=0.005,
        epsilon_decay=0.995,
    )
示例#29
0
            my_tdma.write(str(i) + '   ')
    # save model
    # dqn_agent.save_model("models/model_len1e4_M20_h6_t10-3_1.h5")
    # print the results
    print('-----------------------------')
    print('average agent reward: {}'.format(np.mean(
        agent_reward_list[-2000:])))
    print('average tdma reward: {}'.format(np.mean(tdma_reward_list[-2000:])))
    print('average total reward: {}'.format(
        np.mean(agent_reward_list[-2000:]) +
        np.mean(tdma_reward_list[-2000:])))
    print('Time elapsed:', time.time() - start)


if __name__ == "__main__":
    env = ENVIRONMENT(state_size=40, )

    dqn_agent = DQN(
        env.state_size,
        env.n_actions,
        env.n_nodes,
        memory_size=500,
        replace_target_iter=200,
        batch_size=32,
        learning_rate=0.01,
        gamma=0.9,
        epsilon=0.5,
        epsilon_min=0.005,
        epsilon_decay=0.995,
    )
示例#30
0
    print('average total reward:{}'.format(
        np.mean(agent_reward_list[-2000:]) +
        np.mean(aloha_reward_list[-2000:]) +
        np.mean(tdma_reward_list[-2000:])))
    print('Time elapsed:', time.time() - start)
    dqn_agent.my_plot('len1e5_M20_g0.999_q0.5_t2-5_alpha50_r10_1')


if __name__ == "__main__":
    RATIO = 10
    RATIO1 = RATIO  # ALOHA packet length
    RATIO2 = RATIO  # TDMA packet length
    NUM_ACTIONS = RATIO1 + 1
    env = ENVIRONMENT(features=NUM_ACTIONS + 4,
                      aloha_ratio=RATIO1,
                      tdma_ratio=RATIO2,
                      n_actions=NUM_ACTIONS,
                      transmission_prob=0.5,
                      penalty=0.5)

    dqn_agent = DQN(env.features,
                    env.aloha_ratio,
                    env.tdma_ratio,
                    env.n_actions,
                    env.n_nodes,
                    history_len=20,
                    memory_size=1000,
                    replace_target_iter=20,
                    batch_size=32,
                    learning_rate=0.01,
                    gamma=0.999,
                    epsilon=1,