Пример #1
0
    LAMBDA_FAIRNESS = 10000
    EPOCH = 1000
    INTERNAL = 1
    LR = 0.0005
    SAMPLE_FRAC = 0.6

    pf_env = AIRVIEW(lambda_avg=LAMBDA_AVG, lambda_fairness=LAMBDA_FAIRNESS)
    rl_env = AIRVIEW(lambda_avg=LAMBDA_AVG, lambda_fairness=LAMBDA_FAIRNESS)

    av_ues_info = load_av_ue_info()

    ddpg_agent = DDPGAgent(
        user_num=None,
        feature_num=FEATURE_NUM,
        rbg_num=RBG_NUM,
        replayer_capacity=REPLAYER_CAPACITY,
        sample_frac=SAMPLE_FRAC,
        lr=LR,
        resume=False
    )

    ddpg_agent.actor_eval_net.summary()
    """ resume """
    # ddpg_agent.actor_eval_net = tf.keras.models.load_model('./actor_eval_net_epoch35.h5')

    # softac_agent = SoftACAgent(
    #     user_num=None,
    #     feature_num=FEATURE_NUM,
    #     rbg_num=RBG_NUM,
    #     replayer_capacity=REPLAYER_CAPACITY,
    #     sample_frac=SAMPLE_FRAC,
    episodes = 400  # number of episodes
    training = 1000  # training time for each episode
    testing = 1000  # testing time for each episode

    # Q-Network
    state_size = (
        (sim_env.features - sim_env.CSI) * (sim_env.W - 1 + sim_env.CSI) *
        sim_env.S)  # Calculate state size
    action_size = 2  # set action size ( 2 for blocklength and workload index)
    batch_size = 512  # set batch size

    # initialize DDPG agent and noise process, set decay factor(gamma), learning rates, tau(soft copy factor), batch size
    QN = DDPGAgent(state_size=state_size,
                   action_size=2,
                   gamma=0.0,
                   learning_rate_actor=0.00005,
                   learning_rate_critic=0.0001,
                   tau=0.001,
                   batch_size=batch_size,
                   action_max=[1, 1])
    #  state_size, action size, discount factor(gamma), learning_rate_actor, learning_rate_critic, soft_copy factor(tau), batch size, maximum value for actions
    # set decay_period in OUNoise module directly
    Noise = OUNoise(action_space=action_size,
                    mu=np.asarray([0.0, 0.0]),
                    theta=np.asarray([0.5, 0.5]),
                    max_sigma=np.asarray([0.8, 0.8]),
                    min_sigma=np.asarray([0.1, 0.05]),
                    action_max=[1, 1],
                    action_min=[0.001, 0])
    # action_size, mean reversion level(mu), mean reversion speed(theta), random factor influence (max and min), maximum action value (1,1), minimum action value

    error_avg = []  # declare avg error array
Пример #3
0
for r in range(loop_start, loop_end + 1):
    num_of_servers = r  # number of servers
    # Initialize Simulation Environment
    sim_env = Simulation(number_of_servers=num_of_servers, number_of_users=1, historic_time=hist_timeslots,
                         snr_set=avg_SNR, csi=1, channel=1)  # number_of_server = 3, number_of_users = 1, historic time slots, avgSNR, perfCSI?(yes=0, no=1), channel correlation

    episodes = 400
    training = 1000
    testing = 1000
    testing_comps = 5000

    # Q-Network
    state_size = (sim_env.features * (sim_env.W -1 + sim_env.CSI) * sim_env.S)
    action_size = sim_env.S + 1
    batch_size = 500
    QN = DDPGAgent(state_size=state_size, action_size=1+sim_env.S, gamma=0.0, learning_rate_actor=0.00005,
                   learning_rate_critic=0.0001, tau=0.001, batch_size=batch_size, action_max=[1]*(sim_env.S + 1))

    # Ornstein-Uhlenbeck Process Noise
    mu = np.concatenate(([0], [0]*sim_env.S))
    theta = np.concatenate(([0.3], [0.3]*sim_env.S))
    max_sigma = np.concatenate(([0.4], [0.7]*sim_env.S))
    min_sigma = np.concatenate(([0.1], [0.1]*sim_env.S))
    action_max = [1] * (sim_env.S+1)
    action_min = np.concatenate(([0.01], [0]*sim_env.S))
    Noise = OUNoise(action_space=action_size,  mu=mu, theta=theta, max_sigma=max_sigma, min_sigma=min_sigma,
                    action_max=action_max, action_min=action_min, decay_period=50000)

    states = sim_env.state
    for e in range(episodes):
        sim_env.reset()
        ee = 0
    episodes = 100
    training = 1000
    testing = 1000
    testing_comps = 5000

    success_ratio = []
    # Q-Network
    state_size = ((sim_env.features - sim_env.CSI) *
                  (sim_env.W - 1 + sim_env.CSI) * sim_env.S)
    action_size = 2
    batch_size = 128

    QN = DDPGAgent(state_size=state_size,
                   action_size=2,
                   gamma=df,
                   learning_rate_actor=lr / 2,
                   learning_rate_critic=lr,
                   tau=0.001,
                   batch_size=batch_size,
                   action_max=[1, 1])

    # Noise
    mu = np.concatenate(([0], [0]))
    theta = np.concatenate(([OU_theta - 0.2], [OU_theta]))
    max_sigma = np.concatenate(([OU_sigma - 0.2], [OU_sigma]))
    min_sigma = np.concatenate(([0.001], [0.001]))
    action_max = [1] * 2
    action_min = np.concatenate(([0.01], [0]))
    Noise = OUNoise(action_space=2,
                    mu=mu,
                    theta=theta,
                    max_sigma=max_sigma,
    op_env = OPPORTUNISTIC()
    rl_env = AIRVIEW(LAMBDA_AVG, LAMBDA_FAIRNESS)

    # softac_agent = SoftACAgent(
    #     user_num=None,
    #     feature_num=FEATURE_NUM,
    #     rbg_num=RBG_NUM,
    #     replayer_capacity=REPLAYER_CAPACITY,
    #     sample_frac=SAMPLE_FRAC,
    #     lr=LR
    # )

    ddpg_agent = DDPGAgent(
        user_num=None,
        feature_num=FEATURE_NUM,
        rbg_num=RBG_NUM,
        replayer_capacity=REPLAYER_CAPACITY,
        sample_frac=SAMPLE_FRAC,
        lr=LR
    )

    ''' evaluation '''
    ddpg_agent.actor_eval_net = tf.keras.models.load_model('./actor_eval_net_epoch180.h5')

    av_ues_info = load_av_ue_info()
    av_ues_info = user_info_threshold(av_ues_info, threshold_min=10e+5, threshold_max=10e+6)

    for epoch in range(EPOCH):
        time_start = time.clock()
        INITIAL_USER_START = int((epoch+1) / INTERNAL)
        INITIAL_USER_NUM = 10
        av_ues_idx = list(range(INITIAL_USER_START, INITIAL_USER_START + INITIAL_USER_NUM))
Пример #6
0
count_luff=0
count_bear_off=0

'''
Start of training phase
'''

actor_loss_of_episode = []
critic_loss_of_episode = []
Q_predictions_3 = []
Q_predictions_minus_3 = []
Q_predictions_0 = []

tf.reset_default_graph()
with tf.Session() as sess:
    agent = DDPGAgent(mdp.size, action_size_DDPG, lower_bound, upper_bound, sess)
    for e in range(EPISODES):
        WH = w.generateWind()
        hdg0_rand = random.sample(hdg0_rand_vec, 1)[0]

        hdg0 = hdg0_rand * TORAD * np.ones(10)
        # initialize the incidence randomly
          #
        #  We reinitialize the memory of the flow
        state = mdp.initializeMDP(hdg0, WH)
        mdp.simulator.hyst.reset()
        actor_loss_sim_list = []
        critic_loss_sim_list = []

        for time in range(80):
            # print(time)
Пример #7
0
    testing_comps = 5000

    index = 1
    for cb in comb:
        ID = index
        lr = cb[0]
        df = cb[1]
        OU_theta = cb[2]
        OU_sigma = cb[3]
        print('lr:', lr)
        print('df:', df)
        print('theta:', OU_theta)
        print('sigma:', OU_sigma)

        # Q-Network
        QN = DDPGAgent(state_size=state_size, action_size=2, gamma=df, learning_rate_actor=lr/2,
                       learning_rate_critic=lr, tau=0.001, batch_size=batch_size, action_max=[1] * 2)
        #QN.load_weights('FadingChannel_OutdatedCSI/Hyperparameters/critic_loss/Weights_DDPG_S3_rho0.9_SNR10.0_PS320_lr5e-05_df0.0_sigOU0.8_thetaOU0.5_critic_LR_weights_actor_h5','FadingChannel_OutdatedCSI/Hyperparameters/critic_loss/Weights_DDPG_S3_rho0.9_SNR10.0_PS320_lr5e-05_df0.0_sigOU0.8_thetaOU0.5_critic_LR_weights_critic_h5')
        # Noise
        mu = np.concatenate(([0], [0]))
        theta = np.concatenate(([OU_theta - 0.2], [OU_theta]))
        max_sigma = np.concatenate(([OU_sigma - 0.2], [OU_sigma]))
        min_sigma = np.concatenate(([0.001], [0.05]))
        action_max = [1] * 2
        action_min = np.concatenate(([0.001], [0]))
        Noise = OUNoise(action_space=2, mu=mu, theta=theta, max_sigma=max_sigma, min_sigma=min_sigma,
                        action_max=action_max, action_min=action_min, decay_period=50000)

        error_avg = []
        error = []
        states = sim_env.state
        for e in range(episodes):
Пример #8
0
    error.append([])

print(comb)
for r in range(loop_start, loop_end + 1):
    # Initialize Simulation Environment
    sim_env = Simulation(r, 1, 4, 10)  # number_of_server = 3, number_of_users = 1, historic time slots, avgSNR
    testing = 1000      # number of testing cycles


    # Q-Network
    state_size = (sim_env.features * (sim_env.W-1) * sim_env.S)  # compute state size
    action_size = 2         # action size (blocklength and partitioned workload index)
    batch_size = 512        # batch size

    # Initialize DDPG agent and networks
    QN = DDPGAgent(state_size=state_size, action_size=2, gamma=0.0, learning_rate_actor=0.00005, learning_rate_critic=0.0001, tau=0.001, batch_size=batch_size, action_max=[1, 1])
    # Initialize Noise process
    Noise = OUNoise(action_space=action_size,  mu=np.asarray([0.0, 0.0]), theta=np.asarray([0.5, 0.5]), max_sigma=np.asarray([0.8, 0.9]), min_sigma=np.asarray([0.1, 0.05]), action_max=[1, 1], action_min=[0.001, 0])

    # load networks weights for actor and critic
    QN.load_weights('FadingChannel_OutdatedCSI/MA-raw-results/Weights_DDPG_S{}_rho0.9_SNR10.0_PS320_lr5e-05_df0.0_W4_sigOU0.8_thetaOU0.5_Critic_LR_weights_actor_h5'.format(2),
                    'FadingChannel_OutdatedCSI/MA-raw-results/Weights_DDPG_S{}_rho0.9_SNR10.0_PS320_lr5e-05_df0.0_W4_sigOU0.8_thetaOU0.5_Critic_LR_weights_critic_h5'.format(2))


    # sim_env.T = 0.025
    # sim_env.p = 0.9
    index = 0
    for v in comb:
        # QN = DDPGAgent(state_size=state_size, action_size=2, gamma=0.0, learning_rate_actor=0.00005,
        #                learning_rate_critic=0.0001, tau=0.001, batch_size=batch_size, action_max=[1, 1])
        # Noise = OUNoise(action_space=action_size, mu=np.asarray([0.0, 0.0]), theta=np.asarray([0.5, 0.5]),
Пример #9
0
    snr_set=avg_SNR,
    csi=0,
    channel=0.9
)  # number_of_server = 3, number_of_users = 1, historic time slots, avgSNR, perfCSI?, channel correlation

# Q-Network
state_size = (sim_env.features * (sim_env.W - 1) * sim_env.S)
action_size = sim_env.S + 1
batch_size = 1000
print('se')
print(state_size)

QN = DDPGAgent(state_size=state_size,
               action_size=2,
               gamma=0.5,
               learning_rate_actor=0.0001,
               learning_rate_critic=0.00005,
               tau=0.0001,
               batch_size=batch_size,
               action_max=[1] * (sim_env.S + 1))
model = QN.load_weights(
    sim_env.channel_type +
    '/MA-raw-results/Weights_DDPG_S{}_rho{}_SNR{}_PS{}_lr5e-05_df0.0_W4_sigOU0.8_thetaOU0.5_Critic_LR_weights_actor_h5'
    .format(sim_env.S, sim_env.p, sim_env.SNR_avg[0], sim_env.pi),
    sim_env.channel_type +
    '/MA-raw-results/Weights_DDPG_S{}_rho{}_SNR{}_PS{}_lr5e-05_df0.0_W4_sigOU0.8_thetaOU0.5_Critic_LR_weights_critic_h5'
    .format(sim_env.S, sim_env.p, sim_env.SNR_avg[0], sim_env.pi))
# f = h5py.File(sim_env.channel_type + '/MA-raw-results/Weights_S{}_rho{}_SNR{}_PS{}_LR_weights_actor_h5'.format(sim_env.S, sim_env.p,
#                                                                                             sim_env.SNR_avg[0],
#                                                                                             sim_env.pi), 'r')

# print('model', list(f.keys()))
    training = 500
    testing = 1000
    testing_comps = 5000

    success_ratio = []
    # Q-Network
    state_size = (sim_env.features * (sim_env.W - 1 + sim_env.CSI) * sim_env.S)
    action_size = sim_env.S + 1
    batch_size = 500
    print('se')
    print(state_size)

    QN = DDPGAgent(state_size=state_size,
                   action_size=sim_env.S + 1,
                   gamma=0.5,
                   learning_rate_actor=0.00005,
                   learning_rate_critic=0.0001,
                   tau=0.001,
                   batch_size=batch_size,
                   action_max=[1] * (sim_env.S + 1))
    mu = np.concatenate(([0], [0] * sim_env.S))
    theta = np.concatenate(([0.3], [0.3] * sim_env.S))
    max_sigma = np.concatenate(([0.4], [0.7] * sim_env.S))
    min_sigma = np.concatenate(([0.1], [0.1] * sim_env.S))
    action_max = [1] * (sim_env.S + 1)
    action_min = np.concatenate(([0.01], [0] * sim_env.S))
    print('mu', mu)
    print(action_size)
    Noise = OUNoise(action_space=action_size,
                    mu=mu,
                    theta=theta,
                    max_sigma=max_sigma,