Exemplo n.º 1
0
def main(args):

    json_file = args.json_file
    json_files_train = args.json_files_train

    json_file_policy_train = args.json_file_PA_train
    json_file_policy_CS_train = args.json_file_CS_train

    with open('./config/deployment/' + json_file + '.json', 'r') as f:
        options = json.load(f)
    with open('./config/policy/' + json_file_policy_train + '.json', 'r') as f:
        options_policy = json.load(f)
    with open('./config/policy/' + json_file_policy_CS_train + '.json',
              'r') as f:
        options_CS = json.load(f)
    if not options_policy['cuda']:
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    import tensorflow as tf
    for json_file_train in json_files_train:
        with open('./config/deployment/' + json_file_train + '.json',
                  'r') as f:
            options_train = json.load(f)
        included_train_episodes = []
        tot_train_episodes = int(options_train['simulation']['total_samples'] /
                                 options_train['train_episodes']['T_train'])
        N = options['simulation']['N']
        # Multi channel scenario, M denotes number of channels.
        if 'M' in options['simulation']:
            M = options['simulation']['M']
        else:
            M = 1
        # if N <=20:
        #     for i in range(tot_train_episodes+1):
        #         if i<=15 or i%5==0:
        #             included_train_episodes.append(i)
        # else:
        included_train_episodes.append(tot_train_episodes)

        train_tot_simulations = options_train['simulation']['num_simulations']
        tot_test_episodes = int(options['simulation']['total_samples'] /
                                options['train_episodes']['T_train'])
        inner_train_networks = [[]] * tot_test_episodes
        for i in range(tot_test_episodes):
            inner_train_networks[i] = 0
            # if options['simulation']['test_include'] == 'all':
            #     inner_train_networks[i] = 0#list(range(train_tot_simulations))
            # else:
            #     inner_train_networks[i] = list(np.random.randint(0,train_tot_simulations,options['simulation']['test_include']))
        ## Kumber of samples
        total_samples = options['simulation']['total_samples']

        N = options['simulation']['N']

        # simulation parameters
        train_episodes = options['train_episodes']
        mobility_params = options['mobility_params']
        mobility_params['alpha_angle'] = options['mobility_params'][
            'alpha_angle_rad'] * np.pi  #radian/sec
        #Some defaults
        Pmax_dB = 38.0 - 30
        Pmax = np.power(10.0, Pmax_dB / 10)
        n0_dB = -114.0 - 30
        noise_var = np.power(10.0, n0_dB / 10)

        for ep in included_train_episodes:
            #
            np.random.seed(500 + N + ep)
            file_path = './simulations/channel/%s_network%d' % (json_file, 0)
            data = np.load(file_path + '.npz')

            H_all = data['arr_1']
            H_all_2 = []
            for i in range(total_samples):
                H_all_2.append(H_all[i]**2)

            weights = []
            for loop in range(total_samples):
                weights.append(np.array(np.ones(N)))

            time_calculating_strategy_takes = []

            # Virtual neighbor placer

            policy = DQN.DQN(options,
                             options_policy,
                             N,
                             M,
                             Pmax,
                             noise_var,
                             seed=500 + N + ep)

            ## Our JSAC version uses a linear quantizer.
            strategy_translation = np.zeros(policy.power_levels)
            strategy_translation[0] = 0.0  # Tx power 0
            # Calculate steps in dBm
            for i in range(1, policy.power_levels - 1):
                strategy_translation[i] = i * (Pmax /
                                               (policy.power_levels - 1))
            strategy_translation[-1] = Pmax

            # strategy_translation = np.zeros(policy.power_levels)
            # strategy_translation[0] = 0.0 # Tx power 0
            # Pmin_dB = 10.0-30
            # # Calculate steps in dBm
            # strategy_translation_dB_step = (Pmax_dB-Pmin_dB)/(policy.power_levels-2)
            # for i in range(1,policy.power_levels-1):
            #     strategy_translation[i] = np.power(10.0,((Pmin_dB+(i-1)*strategy_translation_dB_step))/10)
            # strategy_translation[-1] = Pmax

            time_calculating_strategy_takes = []
            time_optimization_at_each_slot_takes = []
            sum_rate_distributed_policy_episode = []
            p_strategy_all_apisode = []
            i_train = 0
            #        for i_train in range(len(inner_train_networks[0])):
            sum_rate_distributed_policy = []
            sum_rate_list_distributed_policy = collections.deque([], 2)
            # Initial allocation is just random
            p_central = Pmax * np.random.rand(N)
            p_strategy = np.array(
                p_central)  # strategy is a completely different object
            p_strategy_current = np.array(p_strategy)

            alpha_central = np.zeros((N, M))
            for k in range(N):
                alpha_central[k, np.random.randint(M)] = 1
            alpha_strategy = np.array(
                alpha_central)  # strategy is a completely different object
            alpha_strategy_current = np.array(alpha_strategy)

            alpha_int_central = np.where(alpha_central == 1)[1].astype(int)
            alpha_int_strategy = np.array(
                alpha_central)  # strategy is a completely different object
            alpha_int_strategy_current = np.array(alpha_int_strategy)

            # current CSI used to calculate the power allocation
            current_csi = 0
            previous_csi = 0

            p_strategy_all = []
            alpha_strategy_all = []
            alpha_int_strategy_all = []

            with tf.Session() as sess:
                sess.run(policy.init)
                policy.initialize_updates(sess)
                # Start iterating voer time slots
                for sim in range(total_samples):
                    # save an instance per training episode for testing purposes.
                    if (sim % train_episodes['T_train'] == 0):
                        train_network_idx = i_train  #inner_train_networks[int(sim /train_episodes['T_train'])][i_train]
                        model_destination = (
                            './simulations/sumrate/policy/%s_%s_%s_network%d_episode%d.ckpt'
                            % (json_file_train, json_file_policy_train,
                               json_file_policy_CS_train, train_network_idx,
                               ep)).replace('[', '').replace(']', '')
                        policy.load(sess, model_destination)
                        i_train += 1
                        i_train = i_train % train_tot_simulations

                    # If at least one time slot passed to get experience
                    if (sim % train_episodes['T_train'] > 1):
                        # Each agent picks its strategy.
                        for agent in range(N):
                            current_local_state = policy.local_state(
                                sim, agent, p_strategy_all, alpha_strategy_all,
                                H_all_2, sum_rate_list_distributed_policy,
                                weights)
                            a_time = time.time()
                            strategy = policy.act_noepsilon(
                                sess, current_local_state, sim)
                            time_calculating_strategy_takes.append(
                                time.time() - a_time)

                            # Pick the action
                            p_strategy[agent] = strategy_translation[
                                strategy % policy.power_levels]
                            alpha_strategy[agent, :] = np.zeros(M)
                            alpha_strategy[agent,
                                           strategy // policy.power_levels] = 1
                            alpha_int_strategy[
                                agent] = strategy // policy.power_levels

                            # Add current state to the short term memory to observe it during the next state
                            policy.previous_state[
                                agent, :] = current_local_state
                            policy.previous_action[agent] = strategy

                    if (sim % train_episodes['T_train'] < 2):
                        p_strategy = np.random.rand(N)
                        alpha_strategy = np.zeros((N, M))
                        for k in range(N):
                            alpha_strategy[k, np.random.randint(M)] = 1
                        alpha_int_strategy = np.where(
                            alpha_strategy == 1)[1].astype(int)

                    p_strategy_current = np.array(p_strategy)
                    alpha_strategy_current = np.array(alpha_strategy)
                    alpha_int_strategy_current = np.array(
                        alpha_int_strategy).astype(int)
                    for m in range(M):
                        policy.prev_suminterferences[:, m] = np.matmul(
                            H_all_2[sim][:, :, m], alpha_strategy[:, m] *
                            p_strategy) - (H_all_2[sim][:, :, m].diagonal() *
                                           alpha_strategy[:, m] *
                                           p_strategy) + noise_var
                    # sims_pos_p[np.where(p_strategy_current>0)] = sim

                    sum_rate_list_distributed_policy.append(
                        pb.reward_helper(H_all[sim], p_strategy,
                                         alpha_strategy, noise_var, Pmax))

                    weights.append(np.array(np.ones(N)))
                    sum_rate_distributed_policy.append(
                        pb.sumrate_multi_weighted_clipped(
                            H_all[sim], p_strategy, alpha_strategy, noise_var,
                            weights[sim]))

                    p_strategy_all.append(p_strategy_current)
                    alpha_strategy_all.append(alpha_strategy_current)
                    alpha_int_strategy_all.append(alpha_int_strategy_current)
                    if (sim % 2500 == 0):
                        print('Test time %d' % (sim))
            sum_rate_distributed_policy_episode.append(
                copy.copy(sum_rate_distributed_policy))
            p_strategy_all_apisode.append(copy.copy(p_strategy_all))

            # End Train Phase
            np_save_path = './simulations/sumrate/test/%s_%s_%s_%s_episode%d.ckpt' % (
                json_file, json_file_train, json_file_policy_train,
                json_file_policy_CS_train, ep)
            print(np_save_path)
            np.savez(np_save_path, options, options_policy,
                     sum_rate_distributed_policy_episode,
                     p_strategy_all_apisode,
                     time_optimization_at_each_slot_takes,
                     time_calculating_strategy_takes, included_train_episodes,
                     inner_train_networks)
Exemplo n.º 2
0
def main(args):

    json_file = args.json_file

    
    json_file_policy = args.json_file_PA
    json_file_CS = args.json_file_CS
    num_sim = args.num_sim
    
    with open ('./config/deployment/'+json_file+'.json','r') as f:
        options = json.load(f)
    with open ('./config/policy/'+json_file_policy+'.json','r') as f:
        options_policy = json.load(f)
    with open ('./config/policy/'+json_file_CS+'.json','r') as f:
        options_CS = json.load(f)
        
    if not options_policy['cuda']:
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    import tensorflow as tf
    import random
    
    ## Number of samples
    total_samples = options['simulation']['total_samples']
        
    N = options['simulation']['N']
    # Multi channel scenario, M denotes number of channels.
    if'M' in options['simulation']:
        M = options['simulation']['M']
    else: M = 1
    
    # PFS set to true means that we save log average sum-rate instead of sum-rate
    pfs = False
    if'pfs' in options['simulation']:
        pfs = options['simulation']['pfs']
        beta = 0.01
    
    
    if num_sim == -1:
        num_simulations = options['simulation']['num_simulations']
        simulation = options['simulation']['simulation_index_start']
    else:
        num_simulations = 1
        simulation = num_sim
    
    # simulation parameters
    train_episodes = options['train_episodes']
    mobility_params = options['mobility_params']
    mobility_params['alpha_angle'] = options['mobility_params']['alpha_angle_rad'] * np.pi #radian/sec
    #Some defaults
    Pmax_dB = 38.0-30
    Pmax = np.power(10.0,Pmax_dB/10)
    n0_dB = -114.0-30
    noise_var = np.power(10.0,n0_dB/10)
    # Hyper aprameters
    N_neighbors = options_policy['N_neighbors']
    neightresh = noise_var*options_policy['neightresh']
    
    forcezero = False
    
    for overal_sims in range(simulation,simulation+num_simulations):
        tf.reset_default_graph()
        np.random.seed(100+overal_sims)
        random.seed(100+overal_sims)
        tf.set_random_seed(100+overal_sims)
        
        file_path = './simulations/channel/%s_network%d'%(json_file,overal_sims)
        data = np.load(file_path+'.npz',allow_pickle=True)
        
        H_all = data['arr_1']
        H_all_2 = []
        for i in range(total_samples):
            H_all_2.append(H_all[i]**2)
        
        weights = []        
        
        time_calculating_strategy_takes = []
            
        # # Virtual neighbor placer
        # neighbors_in = collections.deque([],2)
        # neighbors = collections.deque([],2)
    
        # sims_pos_p = np.zeros(N).astype(int) - 1
    
        policy = DDPG.DDPG(options,options_policy,options_CS,N,M,Pmax,noise_var, seed=100+overal_sims)
       
       
        # Start the simulation 2
        # Sum rate for the simulation 1
        sum_rate_distributed_policy = []
        sum_rate_list_distributed_policy = collections.deque([],2)
        # Initial allocation is just random
        p_central = Pmax * np.random.rand(N)
        p_strategy = np.array(p_central) # strategy is a completely different object
        p_strategy_current = np.array(p_strategy)
        
        alpha_central = np.zeros((N,M))
        for k in range(N):
            alpha_central[k,np.random.randint(M)] = 1
    
        alpha_strategy = np.array(alpha_central) # strategy is a completely different object
        alpha_strategy_current = np.array(alpha_strategy)
        
        alpha_int_central = np.where(alpha_central==1)[1].astype(int)
        alpha_int_strategy = np.array(alpha_central) # strategy is a completely different object
        alpha_int_strategy_current = np.array(alpha_int_strategy)
       
        time_calculating_strategy_takes = []
        time_optimization_at_each_slot_takes = []
       
        p_strategy_all=[]
        alpha_strategy_all = []
        alpha_int_strategy_all = []
    
        with tf.Session() as sess:
            sess.run(policy.init)
            policy.initialize_critic_updates(sess) 
            policy.initialize_actor_updates(sess) 
            policy.initialize_DQNupdates(sess)
            # Start iterating voer time slots
            for sim in range (total_samples):
                policy.check_memory_restart(sess,sim)       
                policy.update_handler(sess,sim)
                # save an instance per training episode for testing purposes.
                if(sim %train_episodes['T_train'] == 0):
                    model_destination = ('./simulations/sumrate/policy/%s_%s_%s_network%d_episode%d.ckpt'%(
                            json_file,json_file_policy,json_file_CS,overal_sims,int(float(sim)/train_episodes['T_train']))).replace('[','').replace(']','')
                    policy.save(sess,model_destination)
        
                # If at least one time slot passed to get experience
                if (sim %train_episodes['T_train'] > 49):                    
                    # Each agent picks its strategy.
                    for agent in range (N):
                        # Channel Selection #               
                        current_local_state = policy.local_state(sim,agent,p_strategy_all,alpha_strategy_all,alpha_int_strategy_all,H_all_2,sum_rate_list_distributed_policy,weights) 
                        a_time = time.time()  
                        CSstrategy = policy.CSact(sess,current_local_state,sim)
                        selected_channel = int(CSstrategy)
                        current_singlechannel_state = current_local_state[selected_channel*policy.DDPGnum_input:(selected_channel+1)*policy.DDPGnum_input]
                        # if sim > 1000 and forcezero:
                        #     print('aaa')
                        PAstrategy = policy.PAact(sess,current_singlechannel_state,sim,forcezero=forcezero)
                        time_calculating_strategy_takes.append(time.time()-a_time)
                        
                        if (sim %train_episodes['T_train'] > 50): # Koew, There is prev state to form experience.
                            # sorted_neighbors_criteria = np.log10(H_all_2[sim-1][np.array(neighbors[-1][agent]),agent]/policy.prev_suminterferences[neighbors[-1][agent]])
                            # sorted_neighbors = neighbors[-1][agent][np.argsort(sorted_neighbors_criteria)[::-1]]
                            # if len(sorted_neighbors)>N_neighbors:
                            #     sorted_neighbors = sorted_neighbors[:N_neighbors]
                            # sorted_neighbors = np.append(sorted_neighbors,agent)
                            # sorted_interfereds = np.argsort(H_all_2[sim-1][:,agent,alpha_int_strategy_all[-1][agent]])[::-1]
                            
                            sorted_interfereds_all = np.argsort(H_all_2[sim-1][:,agent,alpha_int_strategy_all[-1][agent]]/policy.prev_suminterferences[:,alpha_int_strategy_all[-1][agent]])[::-1]
                            sorted_interfereds_all = np.delete(sorted_interfereds_all,np.where(sorted_interfereds_all==agent))
                            
                            sorted_interfereds = np.hstack((np.setdiff1d(sorted_interfereds_all,np.where(alpha_strategy_all[-1][:,alpha_int_strategy_all[-1][agent]]==0),assume_unique=True),
                                                            np.setdiff1d(sorted_interfereds_all,np.where(alpha_strategy_all[-1][:,alpha_int_strategy_all[-1][agent]]==1),assume_unique=True)))
                            # current_reward = min(10,max(-10,np.sum(np.multiply(weights[-1][sorted_interfereds_and_agent],sum_rate_list_distributed_policy[-1][sorted_interfereds_and_agent,agent,alpha_int_strategy_all[-1][agent]]))))
                            # if forcezero: sorted_interfereds_and_agent = np.delete(sorted_interfereds,np.where(sorted_interfereds==agent))#[:policy.N_neighbors]
                            # else: sorted_interfereds_and_agent = np.append(np.delete(sorted_interfereds,np.where(sorted_interfereds==agent)),agent)#[:policy.N_neighbors],agent)
                            sorted_interfereds_and_agent = np.append(np.delete(sorted_interfereds,np.where(sorted_interfereds==agent))[:policy.N_neighbors],agent)
                            if not pfs: current_reward = np.sum(np.multiply(weights[-1][sorted_interfereds_and_agent],sum_rate_list_distributed_policy[-1][sorted_interfereds_and_agent,agent,alpha_int_strategy_all[-1][agent]]))
                            # else: current_reward = np.sum(np.multiply(weights[-1][sorted_interfereds_and_agent],sum_rate_list_distributed_policy[-1][sorted_interfereds_and_agent,agent,alpha_int_strategy_all[-1][agent]]))
                            # else: current_reward = min(10,max(-5,np.sum(np.multiply(weights[-1][sorted_interfereds_and_agent],sum_rate_list_distributed_policy[-1][sorted_interfereds_and_agent,agent,alpha_int_strategy_all[-1][agent]]))))
                            else: current_reward = np.sum(np.multiply(weights[-1][sorted_interfereds_and_agent],sum_rate_list_distributed_policy[-1][sorted_interfereds_and_agent,agent,alpha_int_strategy_all[-1][agent]]))
    
                            # if forcezero: current_reward -= max(sum_rate_list_distributed_policy[-1][np.arange(N),np.arange(N),alpha_int_strategy_all[-1]])
                            if forcezero: current_reward -= weights[-1][agent]*sum_rate_list_distributed_policy[-1][agent,agent,alpha_int_strategy_all[-1][agent]]
                            if forcezero: current_reward -= 5
                            # if forcezero:
                            #     for repeat in range(5):
                            #         policy.CSremember(agent,current_local_state,current_reward)
                            #         policy.PAremember(agent,current_local_state[alpha_int_strategy_all[-1][agent]*policy.DDPGnum_input:(alpha_int_strategy_all[-1][agent]+1)*policy.DDPGnum_input],current_reward)
                            # else:                        
                            policy.CSremember(agent,current_local_state,current_reward)
                            policy.PAremember(agent,current_local_state[alpha_int_strategy_all[-1][agent]*policy.DDPGnum_input:(alpha_int_strategy_all[-1][agent]+1)*policy.DDPGnum_input],current_reward)
                            
                        # Only train it once per timeslot agent == 0 ensures that
                        if agent == (N-1): # If there is enough data to create a mini batch
                            a_time = time.time()
                            
                            # TRAIN for a minibatch
                            policy.train(sess,sim)
                            
                            time_optimization_at_each_slot_takes.append(time.time()-a_time)
                        # if sim == 200:
                        #     print('debug')
                        
                        # Pick the action
                        p_strategy[agent] = policy.Pmax * PAstrategy #** 10
                        # p_strategy[agent] = policy.Pmax * np.round(PAstrategy,2) #** 10
                        alpha_strategy[agent,:] = np.zeros(M)
                        alpha_strategy[agent,CSstrategy] = 1
                        alpha_int_strategy[agent] = selected_channel
    
                        # Add current state to the short term memory to observe it during the next state
                        policy.previous_state[agent,:] = current_singlechannel_state
                        policy.previous_action[agent] = PAstrategy
                        policy.DQNprevious_state[agent,:] = current_local_state
                        policy.DQNprevious_action[agent] = CSstrategy
    
                if(sim %train_episodes['T_train'] < 50):
                    p_strategy = np.random.rand(N)
                    alpha_strategy = np.zeros((N,M))
                    for k in range(N):
                        alpha_strategy[k,np.random.randint(M)] = 1
                    alpha_int_strategy = np.where(alpha_strategy==1)[1].astype(int)
                p_strategy_current = np.array(p_strategy)
                alpha_strategy_current = np.array(alpha_strategy)
                alpha_int_strategy_current = np.array(alpha_int_strategy).astype(int)
                for m in range(M):
                    policy.prev_suminterferences[:,m] = np.matmul(H_all_2[sim][:,:,m],alpha_strategy[:,m]*p_strategy) - (H_all_2[sim][:,:,m].diagonal()*alpha_strategy[:,m]*p_strategy) + noise_var
                if M > 1:
                    policy.sorted_channels = np.argsort(H_all_2[sim][np.arange(N),np.arange(N),:]/policy.prev_suminterferences)/float(M)
                # sims_pos_p[np.where(p_strategy_current>0)] = sim
    
                # tmp_neighbors_in = []
                # tmp_neighbors = []
                # for nei_i in range(N):
                #     neigh_tmp_variab = np.where((H_all[sim][nei_i,:]**2)*p_strategy_current>neightresh)
                #     neigh_tmp_variab = np.delete(neigh_tmp_variab,np.where(neigh_tmp_variab[0]==nei_i))
                #     tmp_neighbors_in.append(neigh_tmp_variab)
    
                # for nei_i in range(N):
                #     tmp_neighlist = []
                #     for nei_j in range(N):
                #         if(len(np.where(tmp_neighbors_in[nei_j]==nei_i)[0]) != 0):
                #             tmp_neighlist.append(nei_j)
                #     if (len(tmp_neighlist) == 0 and len(neighbors) >0):
                #         tmp_neighbors.append(np.array(neighbors[-1][nei_i]))
                #     else:
                #         tmp_neighbors.append(np.array(tmp_neighlist))
                # neighbors.append(tmp_neighbors)
                # neighbors_in.append(tmp_neighbors_in)
                # all sumrates in a list
                sum_rate_list_distributed_policy.append(pb.reward_helper(H_all[sim],p_strategy,alpha_strategy,noise_var,Pmax))
                if not pfs:
                    weights.append(np.array(np.ones(N)))
                    sum_rate_distributed_policy.append(pb.sumrate_multi_weighted_clipped(H_all[sim],p_strategy,alpha_strategy,noise_var,weights[sim]))
                else:
                    rates = sum_rate_list_distributed_policy[-1][np.arange(N),np.arange(N),alpha_int_strategy_current]
                    if sim % train_episodes['T_train'] == 0: # Restart
                        average_sum_rate = np.array(rates)
                    else:
                        average_sum_rate = (1.0-beta)*average_sum_rate+beta*np.array(rates)
                    weights.append(np.array([1.0/i for i in average_sum_rate]))
                    sum_rate_distributed_policy.append(np.sum(np.log(average_sum_rate)))
    
                p_strategy_all.append(p_strategy_current)
                alpha_strategy_all.append(alpha_strategy_current)
                alpha_int_strategy_all.append(alpha_int_strategy_current)
                if(sim%100 == 0):
                    print('Time %d sim %d'%(sim,overal_sims))
                if sum(p_strategy_all[-1]>=0.98*policy.Pmax)==policy.N:
                    print('sim %d all 1'%(sim))
                    forcezero = True
                elif sum(p_strategy_all[-1]<=0.02*policy.Pmax)==policy.N:
                    print('sim %d all 0'%(sim))
                    forcezero = True
                else: forcezero = False
           
            policy.equalize(sess)
            print('Train is over sim %d'%(overal_sims))
    
            model_destination = ('./simulations/sumrate/policy/%s_%s_%s_network%d_episode%d.ckpt'%(
                    json_file,json_file_policy,json_file_CS,overal_sims,int(float(total_samples)/train_episodes['T_train']))).replace('[','').replace(']','')
            policy.save(sess,model_destination)
               
        # End Train Phase
        np_save_path = './simulations/sumrate/train/%s_%s_%s_network%d.ckpt'%(json_file,json_file_policy,json_file_CS,overal_sims)
        print(np_save_path)
        np.savez(np_save_path,options,options_policy,sum_rate_distributed_policy,p_strategy_all,alpha_strategy_all,
                 time_optimization_at_each_slot_takes,time_calculating_strategy_takes)
def main(args):
    
    json_file = args.json_file
    json_files_train = args.json_files_train
        
    json_file_policy_train = args.json_file_PA_train
    json_file_policy_CS_train = args.json_file_CS_train
    
    with open ('./config/deployment/'+json_file+'.json','r') as f:
        options = json.load(f)
    with open ('./config/policy/'+json_file_policy_train+'.json','r') as f:
        options_policy = json.load(f)
    with open ('./config/policy/'+json_file_policy_CS_train+'.json','r') as f:
        options_CS = json.load(f)
    if not options_policy['cuda']:
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    import tensorflow as tf
    
    for json_file_train in json_files_train:
        with open ('./config/deployment/'+json_file_train+'.json','r') as f:
            options_train = json.load(f)
        included_train_episodes = []
        tot_train_episodes = int(options_train['simulation']['total_samples']/options_train['train_episodes']['T_train'])
        N = options['simulation']['N']
        # Multi channel scenario, M denotes number of channels.
        if'M' in options['simulation']:
            M = options['simulation']['M']
        else: M = 1
        # if N <=20:
        #     for i in range(tot_train_episodes+1):
        #         if i<=15 or i%5==0:
        #             included_train_episodes.append(i)
        # else:
        included_train_episodes.append(tot_train_episodes)
        
        train_tot_simulations = options_train['simulation']['num_simulations']
        tot_test_episodes = int(options['simulation']['total_samples']/options['train_episodes']['T_train'])
        inner_train_networks = [[]]*tot_test_episodes
        for i in range(tot_test_episodes):
            inner_train_networks[i] = 0
            # if options['simulation']['test_include'] == 'all':
            #     inner_train_networks[i] = 0#list(range(train_tot_simulations))
            # else:
            #     inner_train_networks[i] = list(np.random.randint(0,train_tot_simulations,options['simulation']['test_include']))
        ## Kumber of samples
        total_samples = options['simulation']['total_samples']
        
        
        
        # simulation parameters
        train_episodes = options['train_episodes']
        mobility_params = options['mobility_params']
        mobility_params['alpha_angle'] = options['mobility_params']['alpha_angle_rad'] * np.pi #radian/sec
        #Some defaults
        Pmax_dB = 38.0-30
        Pmax = np.power(10.0,Pmax_dB/10)
        n0_dB = -114.0-30
        noise_var = np.power(10.0,n0_dB/10)
        # Hyper aprameters
        neightresh = noise_var*options_policy['neightresh']        
        i_train = -1
        
        for ep in included_train_episodes:
            #
            np.random.seed(500 + N + ep)
            # i_train = np.random.randint(train_tot_simulations)
            i_train+=1
            i_train = i_train % train_tot_simulations
            
            file_path = './simulations/channel/%s_network%d'%(json_file,0)
            data = np.load(file_path+'.npz')
            
            H_all = data['arr_1']
            H_all_2 = []
            for i in range(total_samples):
                H_all_2.append(H_all[i]**2)
            
            weights = []
            for loop in range(total_samples):
                weights.append(np.array(np.ones(N)))
            
            time_calculating_strategy_takes = []
                
            # Virtual neighbor placer
            neighbors_in = collections.deque([],2)
            neighbors = collections.deque([],2)
        
            sims_pos_p = np.zeros(N).astype(int) - 1
            policy = DDPG.DDPG(options,options_policy,options_CS,N,M,Pmax,noise_var, seed=500 + N + ep)
           
            time_calculating_strategy_takes = []
            time_optimization_at_each_slot_takes = []
            sum_rate_distributed_policy_episode = []
            p_strategy_all_episode = []
    #        for i_train in range(len(inner_train_networks[0])):
            sum_rate_distributed_policy = []
            sum_rate_list_distributed_policy = collections.deque([],2)
            # Initial allocation is just random
            p_central = Pmax * np.random.rand(N)
            p_strategy = np.array(p_central) # strategy is a completely different object
            p_strategy_current = np.array(p_strategy)
            
            alpha_central = np.zeros((N,M))
            for k in range(N):
                alpha_central[k,np.random.randint(M)] = 1
        
            alpha_strategy = np.array(alpha_central) # strategy is a completely different object
            alpha_strategy_current = np.array(alpha_strategy)
            
            alpha_int_central = np.where(alpha_central==1)[1].astype(int)
            alpha_int_strategy = np.array(alpha_central) # strategy is a completely different object
            alpha_int_strategy_current = np.array(alpha_int_strategy)
                      
            p_strategy_all=[]
            alpha_strategy_all = []
            alpha_int_strategy_all = []
            with tf.Session() as sess:
                sess.run(policy.init)
                policy.initialize_critic_updates(sess) 
                policy.initialize_actor_updates(sess) 
                # Start iterating voer time slots
                for sim in range (total_samples):
                    # save an instance per training episode for testing purposes.
                    if(sim %train_episodes['T_train'] == 0):
                        train_network_idx = i_train#inner_train_networks[int(sim /train_episodes['T_train'])][i_train]
                        model_destination = ('./simulations/sumrate/policy/%s_%s_%s_network%d_episode%d.ckpt'%(
                                json_file_train,json_file_policy_train,json_file_policy_CS_train,train_network_idx,ep)).replace('[','').replace(']','')
                        policy.load(sess,model_destination)
                        i_train+=1
                        i_train = i_train % train_tot_simulations
            
                    # If at least one time slot passed to get experience
                    if (sim %train_episodes['T_train'] > 1):                    
                        # Each agent picks its strategy.
                        for agent in range (N):
                            # Channel Selection #               
                            current_local_state = policy.local_state(sim,agent,p_strategy_all,alpha_strategy_all,alpha_int_strategy_all,H_all_2,sum_rate_list_distributed_policy,weights) 
                            a_time = time.time()  
                            CSstrategy = policy.CSact_noepsilon(sess,current_local_state,sim)
                            selected_channel = int(CSstrategy)
                            current_singlechannel_state = current_local_state[selected_channel*policy.DDPGnum_input:(selected_channel+1)*policy.DDPGnum_input]
                            # if sim > 1000 and forcezero:
                            #     print('aaa')
                            PAstrategy = policy.PAact_noepsilon(sess,current_singlechannel_state,sim)
                            time_calculating_strategy_takes.append(time.time()-a_time)
                            # if sim == 200:
                            #     print('debug')
                            
                            # Pick the action
                            p_strategy[agent] = policy.Pmax * PAstrategy #** 10
                            # p_strategy[agent] = policy.Pmax * np.round(PAstrategy,2) #** 10
                            alpha_strategy[agent,:] = np.zeros(M)
                            alpha_strategy[agent,CSstrategy] = 1
                            alpha_int_strategy[agent] = selected_channel
        
                            # Add current state to the short term memory to observe it during the next state
                            policy.previous_state[agent,:] = current_singlechannel_state
                            policy.previous_action[agent] = PAstrategy
                            policy.DQNprevious_state[agent,:] = current_local_state
                            policy.DQNprevious_action[agent] = CSstrategy
        
                    if(sim %train_episodes['T_train'] < 2):
                        p_strategy = np.random.rand(N)
                        alpha_strategy = np.zeros((N,M))
                        for k in range(N):
                            alpha_strategy[k,np.random.randint(M)] = 1
                        alpha_int_strategy = np.where(alpha_strategy==1)[1].astype(int)
                    p_strategy_current = np.array(p_strategy)
                    alpha_strategy_current = np.array(alpha_strategy)
                    alpha_int_strategy_current = np.array(alpha_int_strategy).astype(int)
                    for m in range(M):
                        policy.prev_suminterferences[:,m] = np.matmul(H_all_2[sim][:,:,m],alpha_strategy[:,m]*p_strategy) - (H_all_2[sim][:,:,m].diagonal()*alpha_strategy[:,m]*p_strategy) + noise_var
                    if M > 1:
                        policy.sorted_channels = np.argsort(H_all_2[sim][np.arange(N),np.arange(N),:]/policy.prev_suminterferences)/float(M)
                    # sims_pos_p[np.where(p_strategy_current>0)] = sim
        
                    # tmp_neighbors_in = []
                    # tmp_neighbors = []
                    # for nei_i in range(N):
                    #     neigh_tmp_variab = np.where((H_all[sim][nei_i,:]**2)*p_strategy_current>neightresh)
                    #     neigh_tmp_variab = np.delete(neigh_tmp_variab,np.where(neigh_tmp_variab[0]==nei_i))
                    #     tmp_neighbors_in.append(neigh_tmp_variab)
        
                    # for nei_i in range(N):
                    #     tmp_neighlist = []
                    #     for nei_j in range(N):
                    #         if(len(np.where(tmp_neighbors_in[nei_j]==nei_i)[0]) != 0):
                    #             tmp_neighlist.append(nei_j)
                    #     if (len(tmp_neighlist) == 0 and len(neighbors) >0):
                    #         tmp_neighbors.append(np.array(neighbors[-1][nei_i]))
                    #     else:
                    #         tmp_neighbors.append(np.array(tmp_neighlist))
                    # neighbors.append(tmp_neighbors)
                    # neighbors_in.append(tmp_neighbors_in)
                    # all sumrates in a list
                    sum_rate_list_distributed_policy.append(pb.reward_helper(H_all[sim],p_strategy,alpha_strategy,noise_var,Pmax))
                    weights.append(np.array(np.ones(N)))
                    sum_rate_distributed_policy.append(pb.sumrate_multi_weighted_clipped(H_all[sim],p_strategy,alpha_strategy,noise_var,weights[sim]))
        
                    p_strategy_all.append(p_strategy_current)
                    alpha_strategy_all.append(alpha_strategy_current)
                    alpha_int_strategy_all.append(alpha_int_strategy_current)
                    
                    if(sim%2500 == 0):
                        print('Test time %d'%(sim))
    
                sum_rate_distributed_policy_episode.append(copy.copy(sum_rate_distributed_policy))
                p_strategy_all_episode.append(copy.copy(p_strategy_all))
            # End Train Phase
            np_save_path = './simulations/sumrate/test/%s_%s_%s_%s_episode%d.ckpt'%(json_file,json_file_train,json_file_policy_train,json_file_policy_CS_train,ep)
            print('Saved to %s'%(np_save_path))
            np.savez(np_save_path,options,options_policy,sum_rate_distributed_policy_episode,p_strategy_all_episode,
                     time_optimization_at_each_slot_takes,time_calculating_strategy_takes,included_train_episodes,inner_train_networks)
Exemplo n.º 4
0
def main(args):

    json_file = args.json_file

    num_sim = args.num_sim

    with open('./config/deployment/' + json_file + '.json', 'r') as f:
        options = json.load(f)

    ## Kumber of samples
    total_samples = options['simulation']['total_samples']

    N = options['simulation']['N']

    # Multi channel scenario, M denotes number of channels.
    if 'M' in options['simulation']:
        M = options['simulation']['M']
    else:
        M = 1

    # PFS set to true means that we save log average sum-rate instead of sum-rate
    pfs = False
    if 'pfs' in options['simulation']:
        pfs = options['simulation']['pfs']

    # Kow assume each time slot is 1ms and
    isTrain = options['simulation']['isTrain']
    if isTrain and num_sim == -1:
        num_simulations = options['simulation']['num_simulations']
        simulation = options['simulation']['simulation_index_start']
    elif isTrain:
        num_simulations = 1
        simulation = num_sim
    else:
        simulation = 0
        num_simulations = 1
    # simulation parameters
    train_episodes = options['train_episodes']
    mobility_params = options['mobility_params']
    mobility_params['alpha_angle'] = options['mobility_params'][
        'alpha_angle_rad'] * np.pi  #radian/sec
    #Some defaults
    Pmax_dB = 38.0 - 30
    Pmax = np.power(10.0, Pmax_dB / 10)
    n0_dB = -114.0 - 30
    noise_var = np.power(10.0, n0_dB / 10)
    # Hyper aprameters

    for overal_sims in range(simulation, simulation + num_simulations):
        if isTrain:
            np.random.seed(50 + overal_sims)
        else:
            np.random.seed(1050 + overal_sims + N)
        file_path = './simulations/channel/%s_network%d' % (json_file,
                                                            overal_sims)
        data = np.load(file_path + '.npz', allow_pickle=True)

        H_all = data['arr_1']

        # Init Optimizer results
        p_FP_nodelay = []
        alpha_FP_nodelay = []
        time_FP_nodelay = []

        print('Ideal Case Run sim %d' % (overal_sims))
        print('Run FP sim %d' % (overal_sims))
        ##################### BENCHMARKS #####################
        # In this simulation I assume that the central allocator directly uses the most recent channel condition available.
        # Sum rate
        sum_rate_nodelay = []
        sum_rate_FPMulti_delayedbyone = []
        sum_rate_randomCS_randomP = []
        if not pfs:
            weights = []
            for loop in range(total_samples):
                weights.append(np.array(np.ones(N)))
            # (p_FP_nodelay,alpha_FP_nodelay,time_FP_nodelay) = zip(*[pb.FP_algorithm_multi(N, M, H, Pmax, noise_var,weight) for (H,weight) in zip(H_all,weights)])
            ii = 0
            for (H, weight) in zip(H_all, weights):
                aa, bb, cc = pb.FP_algorithm_multi(N, M, H, Pmax, noise_var,
                                                   weight)
                p_FP_nodelay.append(aa)
                alpha_FP_nodelay.append(bb)
                time_FP_nodelay.append(cc)
                if ii % 100 == 0:
                    print(ii)
                ii += 1

        #    # General simulations
        # sum_rate_nodelay = [pb.sumrate_multi_weighted_clipped(H,p,alpha,noise_var,weight) for (H,p,alpha,weight) in zip(H_all,p_FP_nodelay,alpha_FP_nodelay,weights)]
        # Kow, simulate the process where we use the original FP algorithm
        # Assumption is we ignore the delay at the backhaul network, i.e. there is no delay between the UE and the central controller.

        # Initial allocation is just random
            p_central = Pmax * np.random.rand(N)
            # all_alpha_combs = pb.permute_alphas(N,M)
            # alpha_central = all_alpha_combs[np.random.randint(len(all_alpha_combs))]
            alpha_central = pb.random_alpha_full(N, M)
            for sim in range(total_samples):
                sum_rate_nodelay.append(
                    pb.sumrate_multi_weighted_clipped(H_all[sim],
                                                      p_FP_nodelay[sim],
                                                      alpha_FP_nodelay[sim],
                                                      noise_var, weights[sim]))
                if (sim > 0):
                    p_central = p_FP_nodelay[sim - 1]
                    alpha_central = alpha_FP_nodelay[sim - 1]
                sum_rate_FPMulti_delayedbyone.append(
                    pb.sumrate_multi_weighted_clipped(H_all[sim], p_central,
                                                      alpha_central, noise_var,
                                                      weights[sim]))

                random_alpha = pb.random_alpha_full(
                    N, M
                )  #all_alpha_combs[np.random.randint(len(all_alpha_combs))]
                # rand_p,_ = pb.FP_algorithm_multi_knownchannel(N,random_alpha, H_all[sim], Pmax, noise_var,weights[sim])
                # sum_rate_randomCS_idealFP.append(pb.sumrate_multi_weighted_clipped(H_all[sim],rand_p,random_alpha,noise_var,weights[sim]))

                sum_rate_randomCS_randomP.append(
                    pb.sumrate_multi_weighted_clipped(H_all[sim],
                                                      Pmax * np.random.rand(N),
                                                      random_alpha, noise_var,
                                                      weights[sim]))
        else:
            beta = 0.01
            for sim in range(total_samples):
                if sim % train_episodes['T_train'] == 0:  # Restart
                    p_FP_nodelay.append(Pmax * np.ones(N))
                    alpha_FP_nodelay.append(np.zeros((N, M)))
                    alpha_FP_nodelay[-1][:, 0] = 1
                    rate = [
                        1e-10 + np.array(
                            pb.sumrate_multi_list_clipped(
                                H_all[sim], p_FP_nodelay[-1],
                                alpha_FP_nodelay[-1], noise_var))
                    ]
                    average_sum_rate = np.array(rate[-1])
                    weights = [np.array([1.0 / i for i in average_sum_rate])]
                    sum_rate_nodelay.append(np.sum(np.log(average_sum_rate)))
                    time_FP_nodelay = [[0, 0]]
                else:
                    tmp_FP_p, tmp_FP_alpha, cc = pb.FP_algorithm_multi(
                        N, M, H_all[sim], Pmax, noise_var, weights[-1])
                    p_FP_nodelay.append(tmp_FP_p)
                    alpha_FP_nodelay.append(tmp_FP_alpha)
                    time_FP_nodelay.append(cc)
                    rate.append(
                        pb.sumrate_multi_list_clipped(H_all[sim], tmp_FP_p,
                                                      tmp_FP_alpha, noise_var))
                    average_sum_rate = (
                        1.0 - beta) * average_sum_rate + beta * np.array(
                            rate[-1])
                    sum_rate_nodelay.append(np.sum(np.log(average_sum_rate)))
                    weights.append(
                        np.array([1.0 / i for i in average_sum_rate]))
                if (sim % 100 == 0):
                    print(sim)
            print('get sum_rate_FPMulti_delayedbyone')
            for sim in range(total_samples):
                if sim % train_episodes['T_train'] == 0:  # Restart
                    allone_alpha = np.zeros((N, M))
                    allone_alpha[:, 0] = 1
                    rate = [
                        1e-10 + np.array(
                            pb.sumrate_multi_list_clipped(
                                H_all[sim], Pmax * np.ones(N), allone_alpha,
                                noise_var))
                    ]
                    average_sum_rate = np.array(rate[-1])
                    weights = [np.array([1.0 / i for i in average_sum_rate])]
                    sum_rate_FPMulti_delayedbyone.append(
                        np.sum(np.log(average_sum_rate)))
                else:
                    tmp_FP_p, tmp_FP_alpha, cc = pb.FP_algorithm_multi(
                        N, M, H_all[sim - 1], Pmax, noise_var, weights[-1])
                    rate.append(
                        pb.sumrate_multi_list_clipped(H_all[sim], tmp_FP_p,
                                                      tmp_FP_alpha, noise_var))
                    average_sum_rate = (
                        1.0 - beta) * average_sum_rate + beta * np.array(
                            rate[-1])
                    sum_rate_FPMulti_delayedbyone.append(
                        np.sum(np.log(average_sum_rate)))
                    weights.append(
                        np.array([1.0 / i for i in average_sum_rate]))
                if (sim % 100 == 0):
                    print(sim)
            # print('get sum_rate_randomCS_idealFP')
            # for sim in range(total_samples):
            #     if sim % train_episodes['T_train'] == 0: # Restart
            #         allone_alpha = np.zeros((N,M))
            #         allone_alpha[:,0] = 1
            #         rate = [1e-10+np.array(pb.sumrate_multi_list_clipped(H_all[sim],Pmax*np.ones(N),allone_alpha,noise_var))]
            #         average_sum_rate = np.array(rate[-1])
            #         weights = [np.array([1.0/i for i in average_sum_rate])]
            #         sum_rate_randomCS_idealFP.append(np.sum(np.log(average_sum_rate)))
            #     else:
            #         tmp_FP_alpha = pb.random_alpha_full(N,M)
            #         tmp_FP_p,_ = pb.FP_algorithm_multi_knownchannel(N,tmp_FP_alpha, H_all[sim], Pmax, noise_var,weights[-1])
            #         rate.append(pb.sumrate_multi_list_clipped(H_all[sim],tmp_FP_p,tmp_FP_alpha,noise_var))
            #         average_sum_rate = (1.0-beta)*average_sum_rate+beta*np.array(rate[-1])
            #         sum_rate_randomCS_idealFP.append(np.sum(np.log(average_sum_rate)))
            #         weights.append(np.array([1.0/i for i in average_sum_rate]))
            #     if(sim%100 == 0):
            #         print(sim)
            print('get sum_rate_randomCS_randomP')
            for sim in range(total_samples):
                if sim % train_episodes['T_train'] == 0:  # Restart
                    allone_alpha = np.zeros((N, M))
                    allone_alpha[:, 0] = 1
                    rate = [
                        1e-10 + np.array(
                            pb.sumrate_multi_list_clipped(
                                H_all[sim], Pmax * np.ones(N), allone_alpha,
                                noise_var))
                    ]
                    average_sum_rate = np.array(rate[-1])
                    weights = [np.array([1.0 / i for i in average_sum_rate])]
                    sum_rate_randomCS_randomP.append(
                        np.sum(np.log(average_sum_rate)))
                else:
                    tmp_FP_alpha = pb.random_alpha_full(N, M)
                    tmp_FP_p = Pmax * np.random.rand(N)
                    rate.append(
                        pb.sumrate_multi_list_clipped(H_all[sim], tmp_FP_p,
                                                      tmp_FP_alpha, noise_var))
                    average_sum_rate = (
                        1.0 - beta) * average_sum_rate + beta * np.array(
                            rate[-1])
                    sum_rate_randomCS_randomP.append(
                        np.sum(np.log(average_sum_rate)))
                    weights.append(
                        np.array([1.0 / i for i in average_sum_rate]))
                if (sim % 100 == 0):
                    print(sim)
        np_save_path = './simulations/sumrate/benchmarks/%s_network%d' % (
            json_file, overal_sims)
        np.savez(np_save_path, p_FP_nodelay, alpha_FP_nodelay, time_FP_nodelay,
                 sum_rate_nodelay, sum_rate_FPMulti_delayedbyone,
                 sum_rate_randomCS_randomP)
        print('Saved to %s' % (np_save_path))