LAMBDA_FAIRNESS = 10000 EPOCH = 1000 INTERNAL = 1 LR = 0.0005 SAMPLE_FRAC = 0.6 pf_env = AIRVIEW(lambda_avg=LAMBDA_AVG, lambda_fairness=LAMBDA_FAIRNESS) rl_env = AIRVIEW(lambda_avg=LAMBDA_AVG, lambda_fairness=LAMBDA_FAIRNESS) av_ues_info = load_av_ue_info() ddpg_agent = DDPGAgent( user_num=None, feature_num=FEATURE_NUM, rbg_num=RBG_NUM, replayer_capacity=REPLAYER_CAPACITY, sample_frac=SAMPLE_FRAC, lr=LR, resume=False ) ddpg_agent.actor_eval_net.summary() """ resume """ # ddpg_agent.actor_eval_net = tf.keras.models.load_model('./actor_eval_net_epoch35.h5') # softac_agent = SoftACAgent( # user_num=None, # feature_num=FEATURE_NUM, # rbg_num=RBG_NUM, # replayer_capacity=REPLAYER_CAPACITY, # sample_frac=SAMPLE_FRAC,
episodes = 400 # number of episodes training = 1000 # training time for each episode testing = 1000 # testing time for each episode # Q-Network state_size = ( (sim_env.features - sim_env.CSI) * (sim_env.W - 1 + sim_env.CSI) * sim_env.S) # Calculate state size action_size = 2 # set action size ( 2 for blocklength and workload index) batch_size = 512 # set batch size # initialize DDPG agent and noise process, set decay factor(gamma), learning rates, tau(soft copy factor), batch size QN = DDPGAgent(state_size=state_size, action_size=2, gamma=0.0, learning_rate_actor=0.00005, learning_rate_critic=0.0001, tau=0.001, batch_size=batch_size, action_max=[1, 1]) # state_size, action size, discount factor(gamma), learning_rate_actor, learning_rate_critic, soft_copy factor(tau), batch size, maximum value for actions # set decay_period in OUNoise module directly Noise = OUNoise(action_space=action_size, mu=np.asarray([0.0, 0.0]), theta=np.asarray([0.5, 0.5]), max_sigma=np.asarray([0.8, 0.8]), min_sigma=np.asarray([0.1, 0.05]), action_max=[1, 1], action_min=[0.001, 0]) # action_size, mean reversion level(mu), mean reversion speed(theta), random factor influence (max and min), maximum action value (1,1), minimum action value error_avg = [] # declare avg error array
for r in range(loop_start, loop_end + 1): num_of_servers = r # number of servers # Initialize Simulation Environment sim_env = Simulation(number_of_servers=num_of_servers, number_of_users=1, historic_time=hist_timeslots, snr_set=avg_SNR, csi=1, channel=1) # number_of_server = 3, number_of_users = 1, historic time slots, avgSNR, perfCSI?(yes=0, no=1), channel correlation episodes = 400 training = 1000 testing = 1000 testing_comps = 5000 # Q-Network state_size = (sim_env.features * (sim_env.W -1 + sim_env.CSI) * sim_env.S) action_size = sim_env.S + 1 batch_size = 500 QN = DDPGAgent(state_size=state_size, action_size=1+sim_env.S, gamma=0.0, learning_rate_actor=0.00005, learning_rate_critic=0.0001, tau=0.001, batch_size=batch_size, action_max=[1]*(sim_env.S + 1)) # Ornstein-Uhlenbeck Process Noise mu = np.concatenate(([0], [0]*sim_env.S)) theta = np.concatenate(([0.3], [0.3]*sim_env.S)) max_sigma = np.concatenate(([0.4], [0.7]*sim_env.S)) min_sigma = np.concatenate(([0.1], [0.1]*sim_env.S)) action_max = [1] * (sim_env.S+1) action_min = np.concatenate(([0.01], [0]*sim_env.S)) Noise = OUNoise(action_space=action_size, mu=mu, theta=theta, max_sigma=max_sigma, min_sigma=min_sigma, action_max=action_max, action_min=action_min, decay_period=50000) states = sim_env.state for e in range(episodes): sim_env.reset() ee = 0
episodes = 100 training = 1000 testing = 1000 testing_comps = 5000 success_ratio = [] # Q-Network state_size = ((sim_env.features - sim_env.CSI) * (sim_env.W - 1 + sim_env.CSI) * sim_env.S) action_size = 2 batch_size = 128 QN = DDPGAgent(state_size=state_size, action_size=2, gamma=df, learning_rate_actor=lr / 2, learning_rate_critic=lr, tau=0.001, batch_size=batch_size, action_max=[1, 1]) # Noise mu = np.concatenate(([0], [0])) theta = np.concatenate(([OU_theta - 0.2], [OU_theta])) max_sigma = np.concatenate(([OU_sigma - 0.2], [OU_sigma])) min_sigma = np.concatenate(([0.001], [0.001])) action_max = [1] * 2 action_min = np.concatenate(([0.01], [0])) Noise = OUNoise(action_space=2, mu=mu, theta=theta, max_sigma=max_sigma,
op_env = OPPORTUNISTIC() rl_env = AIRVIEW(LAMBDA_AVG, LAMBDA_FAIRNESS) # softac_agent = SoftACAgent( # user_num=None, # feature_num=FEATURE_NUM, # rbg_num=RBG_NUM, # replayer_capacity=REPLAYER_CAPACITY, # sample_frac=SAMPLE_FRAC, # lr=LR # ) ddpg_agent = DDPGAgent( user_num=None, feature_num=FEATURE_NUM, rbg_num=RBG_NUM, replayer_capacity=REPLAYER_CAPACITY, sample_frac=SAMPLE_FRAC, lr=LR ) ''' evaluation ''' ddpg_agent.actor_eval_net = tf.keras.models.load_model('./actor_eval_net_epoch180.h5') av_ues_info = load_av_ue_info() av_ues_info = user_info_threshold(av_ues_info, threshold_min=10e+5, threshold_max=10e+6) for epoch in range(EPOCH): time_start = time.clock() INITIAL_USER_START = int((epoch+1) / INTERNAL) INITIAL_USER_NUM = 10 av_ues_idx = list(range(INITIAL_USER_START, INITIAL_USER_START + INITIAL_USER_NUM))
count_luff=0 count_bear_off=0 ''' Start of training phase ''' actor_loss_of_episode = [] critic_loss_of_episode = [] Q_predictions_3 = [] Q_predictions_minus_3 = [] Q_predictions_0 = [] tf.reset_default_graph() with tf.Session() as sess: agent = DDPGAgent(mdp.size, action_size_DDPG, lower_bound, upper_bound, sess) for e in range(EPISODES): WH = w.generateWind() hdg0_rand = random.sample(hdg0_rand_vec, 1)[0] hdg0 = hdg0_rand * TORAD * np.ones(10) # initialize the incidence randomly # # We reinitialize the memory of the flow state = mdp.initializeMDP(hdg0, WH) mdp.simulator.hyst.reset() actor_loss_sim_list = [] critic_loss_sim_list = [] for time in range(80): # print(time)
testing_comps = 5000 index = 1 for cb in comb: ID = index lr = cb[0] df = cb[1] OU_theta = cb[2] OU_sigma = cb[3] print('lr:', lr) print('df:', df) print('theta:', OU_theta) print('sigma:', OU_sigma) # Q-Network QN = DDPGAgent(state_size=state_size, action_size=2, gamma=df, learning_rate_actor=lr/2, learning_rate_critic=lr, tau=0.001, batch_size=batch_size, action_max=[1] * 2) #QN.load_weights('FadingChannel_OutdatedCSI/Hyperparameters/critic_loss/Weights_DDPG_S3_rho0.9_SNR10.0_PS320_lr5e-05_df0.0_sigOU0.8_thetaOU0.5_critic_LR_weights_actor_h5','FadingChannel_OutdatedCSI/Hyperparameters/critic_loss/Weights_DDPG_S3_rho0.9_SNR10.0_PS320_lr5e-05_df0.0_sigOU0.8_thetaOU0.5_critic_LR_weights_critic_h5') # Noise mu = np.concatenate(([0], [0])) theta = np.concatenate(([OU_theta - 0.2], [OU_theta])) max_sigma = np.concatenate(([OU_sigma - 0.2], [OU_sigma])) min_sigma = np.concatenate(([0.001], [0.05])) action_max = [1] * 2 action_min = np.concatenate(([0.001], [0])) Noise = OUNoise(action_space=2, mu=mu, theta=theta, max_sigma=max_sigma, min_sigma=min_sigma, action_max=action_max, action_min=action_min, decay_period=50000) error_avg = [] error = [] states = sim_env.state for e in range(episodes):
error.append([]) print(comb) for r in range(loop_start, loop_end + 1): # Initialize Simulation Environment sim_env = Simulation(r, 1, 4, 10) # number_of_server = 3, number_of_users = 1, historic time slots, avgSNR testing = 1000 # number of testing cycles # Q-Network state_size = (sim_env.features * (sim_env.W-1) * sim_env.S) # compute state size action_size = 2 # action size (blocklength and partitioned workload index) batch_size = 512 # batch size # Initialize DDPG agent and networks QN = DDPGAgent(state_size=state_size, action_size=2, gamma=0.0, learning_rate_actor=0.00005, learning_rate_critic=0.0001, tau=0.001, batch_size=batch_size, action_max=[1, 1]) # Initialize Noise process Noise = OUNoise(action_space=action_size, mu=np.asarray([0.0, 0.0]), theta=np.asarray([0.5, 0.5]), max_sigma=np.asarray([0.8, 0.9]), min_sigma=np.asarray([0.1, 0.05]), action_max=[1, 1], action_min=[0.001, 0]) # load networks weights for actor and critic QN.load_weights('FadingChannel_OutdatedCSI/MA-raw-results/Weights_DDPG_S{}_rho0.9_SNR10.0_PS320_lr5e-05_df0.0_W4_sigOU0.8_thetaOU0.5_Critic_LR_weights_actor_h5'.format(2), 'FadingChannel_OutdatedCSI/MA-raw-results/Weights_DDPG_S{}_rho0.9_SNR10.0_PS320_lr5e-05_df0.0_W4_sigOU0.8_thetaOU0.5_Critic_LR_weights_critic_h5'.format(2)) # sim_env.T = 0.025 # sim_env.p = 0.9 index = 0 for v in comb: # QN = DDPGAgent(state_size=state_size, action_size=2, gamma=0.0, learning_rate_actor=0.00005, # learning_rate_critic=0.0001, tau=0.001, batch_size=batch_size, action_max=[1, 1]) # Noise = OUNoise(action_space=action_size, mu=np.asarray([0.0, 0.0]), theta=np.asarray([0.5, 0.5]),
snr_set=avg_SNR, csi=0, channel=0.9 ) # number_of_server = 3, number_of_users = 1, historic time slots, avgSNR, perfCSI?, channel correlation # Q-Network state_size = (sim_env.features * (sim_env.W - 1) * sim_env.S) action_size = sim_env.S + 1 batch_size = 1000 print('se') print(state_size) QN = DDPGAgent(state_size=state_size, action_size=2, gamma=0.5, learning_rate_actor=0.0001, learning_rate_critic=0.00005, tau=0.0001, batch_size=batch_size, action_max=[1] * (sim_env.S + 1)) model = QN.load_weights( sim_env.channel_type + '/MA-raw-results/Weights_DDPG_S{}_rho{}_SNR{}_PS{}_lr5e-05_df0.0_W4_sigOU0.8_thetaOU0.5_Critic_LR_weights_actor_h5' .format(sim_env.S, sim_env.p, sim_env.SNR_avg[0], sim_env.pi), sim_env.channel_type + '/MA-raw-results/Weights_DDPG_S{}_rho{}_SNR{}_PS{}_lr5e-05_df0.0_W4_sigOU0.8_thetaOU0.5_Critic_LR_weights_critic_h5' .format(sim_env.S, sim_env.p, sim_env.SNR_avg[0], sim_env.pi)) # f = h5py.File(sim_env.channel_type + '/MA-raw-results/Weights_S{}_rho{}_SNR{}_PS{}_LR_weights_actor_h5'.format(sim_env.S, sim_env.p, # sim_env.SNR_avg[0], # sim_env.pi), 'r') # print('model', list(f.keys()))
training = 500 testing = 1000 testing_comps = 5000 success_ratio = [] # Q-Network state_size = (sim_env.features * (sim_env.W - 1 + sim_env.CSI) * sim_env.S) action_size = sim_env.S + 1 batch_size = 500 print('se') print(state_size) QN = DDPGAgent(state_size=state_size, action_size=sim_env.S + 1, gamma=0.5, learning_rate_actor=0.00005, learning_rate_critic=0.0001, tau=0.001, batch_size=batch_size, action_max=[1] * (sim_env.S + 1)) mu = np.concatenate(([0], [0] * sim_env.S)) theta = np.concatenate(([0.3], [0.3] * sim_env.S)) max_sigma = np.concatenate(([0.4], [0.7] * sim_env.S)) min_sigma = np.concatenate(([0.1], [0.1] * sim_env.S)) action_max = [1] * (sim_env.S + 1) action_min = np.concatenate(([0.01], [0] * sim_env.S)) print('mu', mu) print(action_size) Noise = OUNoise(action_space=action_size, mu=mu, theta=theta, max_sigma=max_sigma,