Пример #1
0
def main(args):

    json_file = args.json_file
    json_files_train = args.json_files_train
    json_file_policy_train = args.json_file_policy_train

    with open('./config/deployment/' + json_file + '.json', 'r') as f:
        options = json.load(f)
    with open('./config/policy/' + json_file_policy_train + '.json', 'r') as f:
        options_policy = json.load(f)
    if not options_policy['cuda']:
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    import tensorflow as tf

    for json_file_train in json_files_train:
        with open('./config/deployment/' + json_file_train + '.json',
                  'r') as f:
            options_train = json.load(f)
        included_train_episodes = []
        tot_train_episodes = int(options_train['simulation']['total_samples'] /
                                 options_train['train_episodes']['T_train'])
        N = options['simulation']['N']
        if N <= 20:
            for i in range(tot_train_episodes + 1):
                if i <= 15 or i % 5 == 0:
                    included_train_episodes.append(i)
        else:
            included_train_episodes.append(tot_train_episodes)

        train_tot_simulations = options_train['simulation']['num_simulations']
        tot_test_episodes = int(options['simulation']['total_samples'] /
                                options['train_episodes']['T_train'])
        inner_train_networks = [[]] * tot_test_episodes
        for i in range(tot_test_episodes):
            if options['simulation']['test_include'] == 'all':
                inner_train_networks[i] = 0
            else:
                inner_train_networks[i] = list(
                    np.random.randint(0, train_tot_simulations,
                                      options['simulation']['test_include']))
        ## Kumber of samples
        total_samples = options['simulation']['total_samples']

        N = options['simulation']['N']

        # simulation parameters
        train_episodes = options['train_episodes']
        mobility_params = options['mobility_params']
        mobility_params['alpha_angle'] = options['mobility_params'][
            'alpha_angle_rad'] * np.pi  #radian/sec
        #Some defaults
        Pmax_dB = 46.0 - 30
        Pmax = np.power(10.0, Pmax_dB / 10)
        n0_dB = -104.0 - 30
        noise_var = np.power(10.0, n0_dB / 10)
        # Hyper aprameters
        neightresh = noise_var * options_policy['neightresh']

        for ep in included_train_episodes:
            #

            file_path = './simulations/channel/%s_network%d' % (json_file, 0)
            data = np.load(file_path + '.npz')

            H_all = data['arr_1']
            H_all_2 = []
            for i in range(total_samples):
                H_all_2.append(H_all[i]**2)

            weights = []
            for loop in range(total_samples):
                weights.append(np.array(np.ones(N)))

            time_calculating_strategy_takes = []

            # Virtual neighbor placer
            neighbors_in = collections.deque([], 2)
            neighbors = collections.deque([], 2)

            sims_pos_p = np.zeros(N).astype(int) - 1

            policy = DQN.DQN(options, options_policy, N, Pmax, noise_var)

            strategy_translation = np.zeros(policy.num_actions)
            strategy_translation[0] = 0.0  # Tx power 0
            Pmin_dB = 10.0 - 30
            # Calculate steps in dBm
            strategy_translation_dB_step = (Pmax_dB -
                                            Pmin_dB) / (policy.num_actions - 2)
            for i in range(1, policy.num_actions - 1):
                strategy_translation[i] = np.power(
                    10.0,
                    ((Pmin_dB + (i - 1) * strategy_translation_dB_step)) / 10)
            strategy_translation[-1] = Pmax

            time_calculating_strategy_takes = []
            time_optimization_at_each_slot_takes = []
            sum_rate_distributed_policy_episode = []
            p_strategy_all_apisode = []
            i_train = 0

            sum_rate_distributed_policy = []
            sum_rate_list_distributed_policy = collections.deque([], 2)
            # Initial allocation is just random
            p_central = Pmax * np.random.rand(N)
            p_strategy = np.array(
                p_central)  # strategy is a completely different object
            p_strategy_current = np.array(p_strategy)

            p_strategy_all = []

            with tf.Session() as sess:
                sess.run(policy.init)
                policy.initialize_updates(sess)
                # Start iterating voer time slots
                for sim in range(total_samples):
                    # save an instance per training episode for testing purposes.
                    if (sim % train_episodes['T_train'] == 0):
                        train_network_idx = i_train
                        model_destination = (
                            './simulations/sumrate/policy/%s_%s_network%d_episode%d.ckpt'
                            % (json_file_train, json_file_policy_train,
                               train_network_idx, ep)).replace('[',
                                                               '').replace(
                                                                   ']', '')
                        policy.load(sess, model_destination)
                        i_train += 1
                        i_train = i_train % train_tot_simulations

                    # If at least one time slot passed to get experience
                    if (sim % train_episodes['T_train'] > 1):
                        # Each agent picks its strategy.
                        for agent in range(N):
                            current_local_state = policy.local_state(
                                sim, agent, p_strategy_all, H_all_2, neighbors,
                                neighbors_in, sum_rate_list_distributed_policy,
                                sims_pos_p)
                            a_time = time.time()
                            strategy = policy.act_noepsilon(
                                sess, current_local_state, sim)
                            time_calculating_strategy_takes.append(
                                time.time() - a_time)

                            # Pick the action
                            p_strategy[agent] = strategy_translation[strategy]

                            # Add current state to the short term memory to observe it during the next state
                            policy.previous_state[
                                agent, :] = current_local_state
                            policy.previous_action[agent] = strategy

                    if (sim % train_episodes['T_train'] < 2):
                        p_strategy = Pmax * np.ones(N)  #np.random.rand(N)
                    p_strategy_current = np.array(p_strategy)
                    policy.prev_suminterferences = np.matmul(
                        H_all_2[sim], p_strategy) - (H_all_2[sim].diagonal() *
                                                     p_strategy) + noise_var
                    sims_pos_p[np.where(p_strategy_current > 0)] = sim

                    tmp_neighbors_in = []
                    tmp_neighbors = []
                    for nei_i in range(N):
                        neigh_tmp_variab = np.where(
                            (H_all[sim][nei_i, :]**2) *
                            p_strategy_current > neightresh)
                        neigh_tmp_variab = np.delete(
                            neigh_tmp_variab,
                            np.where(neigh_tmp_variab[0] == nei_i))
                        tmp_neighbors_in.append(neigh_tmp_variab)

                    for nei_i in range(N):
                        tmp_neighlist = []
                        for nei_j in range(N):
                            if (len(
                                    np.where(
                                        tmp_neighbors_in[nei_j] == nei_i)[0])
                                    != 0):
                                tmp_neighlist.append(nei_j)
                        if (len(tmp_neighlist) == 0 and len(neighbors) > 0):
                            tmp_neighbors.append(np.array(
                                neighbors[-1][nei_i]))
                        else:
                            tmp_neighbors.append(np.array(tmp_neighlist))
                    neighbors.append(tmp_neighbors)
                    neighbors_in.append(tmp_neighbors_in)
                    # all sumrates in a list
                    sum_rate_list_distributed_policy.append(
                        pb.reward_helper(H_all[sim], p_strategy, N, noise_var,
                                         Pmax, neighbors_in[-1]))

                    sum_rate_distributed_policy.append(
                        pb.sumrate_weighted_clipped(H_all[sim], p_strategy, N,
                                                    noise_var, weights[sim]))
                    p_strategy_all.append(np.array(p_strategy))
                    if (sim % 2500 == 0):
                        print('Test time %d' % (sim))
            sum_rate_distributed_policy_episode.append(
                copy.copy(sum_rate_distributed_policy))
            p_strategy_all_apisode.append(copy.copy(p_strategy_all))

            # End Train Phase
            np_save_path = './simulations/sumrate/test/%s_%s_%s_episode%d.ckpt' % (
                json_file, json_file_train, json_file_policy_train, ep)
            print(np_save_path)
            np.savez(np_save_path, options, options_policy,
                     sum_rate_distributed_policy_episode,
                     p_strategy_all_apisode,
                     time_optimization_at_each_slot_takes,
                     time_calculating_strategy_takes, included_train_episodes,
                     inner_train_networks)
def main(args):

    json_file = args.json_file
    json_file_policy = args.json_file_policy
    num_sim = args.num_sim

    with open('./config/deployment/' + json_file + '.json', 'r') as f:
        options = json.load(f)
    with open('./config/policy/' + json_file_policy + '.json', 'r') as f:
        options_policy = json.load(f)

    if not options_policy['cuda']:
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    import tensorflow as tf

    ## Kumber of samples
    total_samples = options['simulation']['total_samples']

    N = options['simulation']['N']

    if num_sim == -1:
        num_simulations = options['simulation']['num_simulations']
        simulation = options['simulation']['simulation_index_start']
    else:
        num_simulations = 1
        simulation = num_sim

    # simulation parameters
    train_episodes = options['train_episodes']
    mobility_params = options['mobility_params']
    mobility_params['alpha_angle'] = options['mobility_params'][
        'alpha_angle_rad'] * np.pi  #radian/sec
    #Some defaults
    Pmax_dB = 38.0 - 30
    Pmax = np.power(10.0, Pmax_dB / 10)
    n0_dB = -114.0 - 30
    noise_var = np.power(10.0, n0_dB / 10)
    # Hyper aprameters
    N_neighbors = options_policy['N_neighbors']
    neightresh = noise_var * options_policy['neightresh']

    for overal_sims in range(simulation, simulation + num_simulations):
        tf.reset_default_graph()
        tf.set_random_seed(100 + overal_sims)
        np.random.seed(100 + overal_sims)

        file_path = './simulations/channel/%s_network%d' % (json_file,
                                                            overal_sims)
        data = np.load(file_path + '.npz', allow_pickle=True)

        H_all = data['arr_1']
        H_all_2 = []
        for i in range(total_samples):
            H_all_2.append(H_all[i]**2)

        weights = []
        for loop in range(total_samples):
            weights.append(np.array(np.ones(N)))

        time_calculating_strategy_takes = []

        # Virtual neighbor placer
        neighbors_in = collections.deque([], 2)
        neighbors = collections.deque([], 2)

        sims_pos_p = np.zeros(N).astype(int) - 1

        policy = DDPG.DDPG(options, options_policy, N, Pmax, noise_var)

        # Start the simulation 2
        # Sum rate for the simulation 1
        sum_rate_distributed_policy = []
        sum_rate_list_distributed_policy = collections.deque([], 2)
        # Initial allocation is just random
        p_central = Pmax * np.random.rand(N)
        p_strategy = np.array(
            p_central)  # strategy is a completely different object
        p_strategy_current = np.array(p_strategy)

        time_calculating_strategy_takes = []
        time_optimization_at_each_slot_takes = []

        p_strategy_all = []

        with tf.Session() as sess:
            sess.run(policy.init)
            policy.initialize_critic_updates(sess)
            policy.initialize_actor_updates(sess)
            # Start iterating voer time slots
            for sim in range(total_samples):
                policy.check_memory_restart(sess, sim)
                policy.update_handler(sess, sim)
                # save an instance per training episode for testing purposes.
                if (sim % train_episodes['T_train'] == 0):
                    model_destination = (
                        './simulations/sumrate/policy/%s_%s_network%d_episode%d.ckpt'
                        %
                        (json_file, json_file_policy, overal_sims,
                         int(float(sim) / train_episodes['T_train']))).replace(
                             '[', '').replace(']', '')
                    policy.save(sess, model_destination)

                # If at least one time slot passed to get experience
                if (sim % train_episodes['T_train'] > 1):
                    # Each agent picks its strategy.
                    for agent in range(N):
                        current_local_state = policy.local_state(
                            sim, agent, p_strategy_all, H_all_2, neighbors,
                            neighbors_in, sum_rate_list_distributed_policy,
                            sims_pos_p)
                        a_time = time.time()
                        strategy = policy.act(sess, current_local_state, sim,
                                              agent)
                        time_calculating_strategy_takes.append(time.time() -
                                                               a_time)

                        if (sim % train_episodes['T_train'] > 2
                            ):  # Koew, There is prev state to form experience.
                            sorted_neighbors_criteria = np.log10(
                                H_all_2[sim -
                                        1][np.array(neighbors[-1][agent]),
                                           agent] /
                                policy.prev_suminterferences[neighbors[-1]
                                                             [agent]])
                            sorted_neighbors = neighbors[-1][agent][np.argsort(
                                sorted_neighbors_criteria)[::-1]]
                            if len(sorted_neighbors) > N_neighbors:
                                sorted_neighbors = sorted_neighbors[:
                                                                    N_neighbors]
                            sorted_neighbors = np.append(
                                sorted_neighbors, agent)
                            current_reward = np.sum(
                                np.multiply(
                                    weights[sim - 1],
                                    sum_rate_list_distributed_policy[-1]
                                    [:, agent])[sorted_neighbors])
                            policy.remember(agent, current_local_state,
                                            current_reward)

                        # Only train it once per timeslot agent == 0 ensures that
                        if agent == (
                                N - 1
                        ):  # If there is enough data to create a mini batch
                            a_time = time.time()

                            # TRAIK for a minibatch
                            policy.train(sess, sim)

                            time_optimization_at_each_slot_takes.append(
                                time.time() - a_time)

                        # Pick the action
                        p_strategy[agent] = policy.Pmax * strategy  #** 10

                        # Add current state to the short term memory to observe it during the next state
                        policy.previous_state[agent, :] = current_local_state
                        policy.previous_action[agent] = strategy

                if (sim % train_episodes['T_train'] < 2):
                    p_strategy = np.random.rand(N)
                p_strategy_current = np.array(p_strategy)
                policy.prev_suminterferences = np.matmul(
                    H_all_2[sim], p_strategy) - (H_all_2[sim].diagonal() *
                                                 p_strategy) + noise_var
                sims_pos_p[np.where(p_strategy_current > 0)] = sim

                tmp_neighbors_in = []
                tmp_neighbors = []
                for nei_i in range(N):
                    neigh_tmp_variab = np.where(
                        (H_all[sim][nei_i, :]**2) *
                        p_strategy_current > neightresh)
                    neigh_tmp_variab = np.delete(
                        neigh_tmp_variab,
                        np.where(neigh_tmp_variab[0] == nei_i))
                    tmp_neighbors_in.append(neigh_tmp_variab)

                for nei_i in range(N):
                    tmp_neighlist = []
                    for nei_j in range(N):
                        if (len(np.where(tmp_neighbors_in[nei_j] == nei_i)[0])
                                != 0):
                            tmp_neighlist.append(nei_j)
                    if (len(tmp_neighlist) == 0 and len(neighbors) > 0):
                        tmp_neighbors.append(np.array(neighbors[-1][nei_i]))
                    else:
                        tmp_neighbors.append(np.array(tmp_neighlist))
                neighbors.append(tmp_neighbors)
                neighbors_in.append(tmp_neighbors_in)
                # all sumrates in a list
                sum_rate_list_distributed_policy.append(
                    pb.reward_helper(H_all[sim], p_strategy, N, noise_var,
                                     Pmax, neighbors_in[-1]))

                sum_rate_distributed_policy.append(
                    pb.sumrate_weighted_clipped(H_all[sim], p_strategy, N,
                                                noise_var, weights[sim]))
                p_strategy_all.append(np.array(p_strategy))
                if (sim % 2500 == 0):
                    print('Time %d sim %d' % (sim, overal_sims))

            policy.equalize(sess)
            print('Train is over sim %d' % (overal_sims))

            model_destination = (
                './simulations/sumrate/policy/%s_%s_network%d_episode%d.ckpt' %
                (json_file, json_file_policy, overal_sims,
                 int(float(total_samples) /
                     train_episodes['T_train']))).replace('[',
                                                          '').replace(']', '')
            policy.save(sess, model_destination)

        # End Train Phase
        np_save_path = './simulations/sumrate/train/%s_%s_network%d.ckpt' % (
            json_file, json_file_policy, overal_sims)
        print(np_save_path)
        np.savez(np_save_path, options, options_policy,
                 sum_rate_distributed_policy, p_strategy_all,
                 time_optimization_at_each_slot_takes,
                 time_calculating_strategy_takes)
def main(args):

    json_file = args.json_file
    num_sim = args.num_sim

    with open('./config/deployment/' + json_file + '.json', 'r') as f:
        options = json.load(f)

    ## Kumber of samples
    total_samples = options['simulation']['total_samples']

    N = options['simulation']['N']

    # Kow assume each time slot is 1ms and
    isTrain = options['simulation']['isTrain']
    if isTrain and num_sim == -1:
        num_simulations = options['simulation']['num_simulations']
        simulation = options['simulation']['simulation_index_start']
    elif isTrain:
        num_simulations = 1
        simulation = num_sim
    else:
        simulation = 0
        num_simulations = 1
    # simulation parameters
    mobility_params = options['mobility_params']
    mobility_params['alpha_angle'] = options['mobility_params'][
        'alpha_angle_rad'] * np.pi  #radian/sec
    #Some defaults
    Pmax_dB = 38.0 - 30
    Pmax = np.power(10.0, Pmax_dB / 10)
    n0_dB = -114.0 - 30
    noise_var = np.power(10.0, n0_dB / 10)
    # Hyper aprameters

    for overal_sims in range(simulation, simulation + num_simulations):
        if isTrain:
            np.random.seed(50 + overal_sims)
        else:
            np.random.seed(1050 + overal_sims + N)
        file_path = './simulations/channel/%s_network%d' % (json_file,
                                                            overal_sims)
        data = np.load(file_path + '.npz', allow_pickle=True)

        H_all = data['arr_1']

        weights = []
        for loop in range(total_samples):
            weights.append(np.array(np.ones(N)))
        # Init Optimizer results
        p_FP_nodelay = []
        time_FP_nodelay = []
        p_WMMSE_nodelay = []
        time_WMMSE_nodelay = []

        print('Ideal Case Run sim %d' % (overal_sims))
        print('Run FP sim %d' % (overal_sims))
        (p_FP_nodelay, time_FP_nodelay) = zip(*[
            pb.FP_algorithm_weighted(N, H, Pmax, noise_var, weight)
            for (H, weight) in zip(H_all, weights)
        ])

        print('Run WMMSE sim %d' % (overal_sims))
        (p_WMMSE_nodelay, time_WMMSE_nodelay) = zip(*[
            pb.WMMSE_algorithm_weighted(N, H, Pmax, noise_var, weight)
            for (H, weight) in zip(H_all, weights)
        ])

        #    # General simulations
        sum_rate_nodelay = [
            pb.sumrate_weighted_clipped(H, p, N, noise_var, weight)
            for (H, p, weight) in zip(H_all, p_FP_nodelay, weights)
        ]
        sum_rate_WMMSE = [
            pb.sumrate_weighted_clipped(H, p, N, noise_var, weight)
            for (H, p, weight) in zip(H_all, p_WMMSE_nodelay, weights)
        ]

        # Kow, simulate the process where we use the original FP algorithm
        # Assumption is we ignore the delay at the backhaul network, i.e. there is no delay between the UE and the central controller.

        ##################### OTHER BENCHMARKS #####################
        # In this simulation I assume that the central allocator directly uses the most recent channel condition available.
        # Sum rate for the simulation 1
        sum_rate_delayed_central = []
        sum_rate_random = []
        sum_rate_max = []
        # Initial allocation is just random
        p_central = Pmax * np.random.rand(N)

        for sim in range(total_samples):
            if (sim > 0):
                p_central = p_FP_nodelay[sim - 1]
            sum_rate_delayed_central.append(
                pb.sumrate_weighted_clipped(H_all[sim], p_central, N,
                                            noise_var, weights[sim]))
            sum_rate_random.append(
                pb.sumrate_weighted_clipped(H_all[sim],
                                            Pmax * np.random.rand(N), N,
                                            noise_var, weights[sim]))
            sum_rate_max.append(
                pb.sumrate_weighted_clipped(H_all[sim], Pmax * np.ones(N), N,
                                            noise_var, weights[sim]))

        np_save_path = './simulations/sumrate/benchmarks/%s_network%d' % (
            json_file, overal_sims)
        np.savez(np_save_path, p_FP_nodelay, time_FP_nodelay, sum_rate_nodelay,
                 p_WMMSE_nodelay, time_WMMSE_nodelay, sum_rate_WMMSE,
                 sum_rate_delayed_central, sum_rate_random, sum_rate_max)
        print('Saved to %s' % (np_save_path))