Пример #1
0
def main(training=True):
    #load an environment
    env = Environment(opticalTweezers(), 4)
    env.reset()

    #state action placeholders
    States = tf.placeholder(tf.float32, shape=[None, 4, 6], name='States')
    Actions = tf.placeholder(tf.int32, shape=[None], name='Actions')
    Rewards = tf.placeholder(tf.float32, shape=[None, 1], name='Rewards')
    Advantages = tf.placeholder(tf.float32, shape=[None, 1], name='Advantages')
    Entropy_coefficient = tf.placeholder(tf.float32,
                                         shape=(),
                                         name='Entropy_coefficient')

    #load a model or else init
    if os.path.isfile(os.path.join(os.getcwd(), 'model')):
        #load model
        pass
    else:
        max_grad_norm = 0.5
        actor = Actor(States, Actions, Advantages, Rewards,
                      Entropy_coefficient, max_grad_norm)
        critic = Critic(Rewards, actor)
    if training:
        train_model2(env, actor, critic, States, Actions, Rewards, Advantages,
                     Entropy_coefficient)
    else:
        pass
Пример #2
0
def main(args=None):

    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)
    env = Environment(gym.make('LunarLanderContinuous-v2'))
    env.reset()
    state_dim = env.get_state_size()
    action_space = gym.make(args.env).action_space
    action_dim = action_space.high.shape[0]
    act_range = action_space.high

    algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)
    stats = algo.train(env, args, summary_writer)

    df = pd.DataFrame(np.array(stats))
    df.to_csv(args.type + "/logs.csv",
              header=['Episode', 'Mean', 'Stddev'],
              float_format='%10.5f')

    # Save weights and close environments
    exp_dir = 'models/'
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    algo.save_weights(exp_dir)
    env.env.close()
Пример #3
0
def main():
	args=Arg()
	env = Environment(gym.make(args.env), args.consecutive_frames)
	env.reset()
	state_dim = env.get_state_size()
	action_dim = gym.make(args.env).action_space.n
	algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
	set_session(get_session())
	summary_writer = tf.summary.FileWriter("./tensorboard_" + args.env)
	stats = algo.train(env, args, summary_writer)
	
	
	print(stats)
	algo.save_weights('./'+args.env+'.h5')
	env.env.close()
Пример #4
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    summary_writer = tf.summary.FileWriter("{}/tensorboard_M1_{}_M1_{}_snr1_{}_snr2_{}".format(args.out_dir, args.M1, args.M1, args.snr_M1, args.snr_M2))

    # Initialize the wireless environment
    users_env = UsersEnvCluster(args.M1, args.M2, args.snr_M1, args.snr_M2, fixed_channel=False)
    print(users_env)

    # Wrap the environment to use consecutive frames
    env = Environment(users_env, args.consecutive_frames)
    env.reset()

    # Define parameters for the DDQN and DDPG algorithms
    state_dim = env.get_state_size()
    action_dim = users_env.action_dim
    act_range = 1
    act_min = 0

    # Initialize the DQN algorithm for the clustering optimization
    n_clusters = users_env.n_clusters
    algo_clustering = DDQN(n_clusters, state_dim, args)

    # Initialize the DDPG algorithm for the beamforming optimization
    algo = DDPG(action_dim, state_dim, act_range, act_min, args.consecutive_frames, algo_clustering, episode_length=args.episode_length)

    if args.step == "train":
        # Train
        stats = algo.train(env, args, summary_writer)

        # Export results to CSV
        if(args.gather_stats):
            df = pd.DataFrame(np.array(stats))
            df.to_csv(args.out_dir + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f')

        # Save weights and close environments
        exp_dir = '{}/models_M1_{}_M2_{}_snr1_{}_snr2_{}/'.format(args.out_dir, args.M1, args.M2, args.snr_M1, args.snr_M2)
        if not os.path.exists(exp_dir):
            os.makedirs(exp_dir)
        # Save DDPG
        export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDPG", args.nb_episodes, args.batch_size)
        algo.save_weights(export_path)

        # Save DDQN
        export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDQN", args.nb_episodes, args.batch_size)
        algo.ddqn_clustering.save_weights(export_path)

    elif args.step == "inference":
        print("Loading the DDPG networks (actor and critic) and the DDQN policy network ...")
        path_actor = '<add the path of the .h5 file of the DDPG actor>'
        path_critic = '<add the path of the .h5 file of the DDPG critic>'
        path_ddqn = '<add the path of the .h5 file of the DDQN actor>'
        algo.load_weights(path_actor, path_critic, path_ddqn)

        # run a random policy during inference as an example
        s = np.random.rand(1, args.Nr)
        s_1 = np.zeros_like(s)
        s = np.vstack((s_1, s))

        while True:
            W = algo.policy_action(s)
            cluster_index = algo.ddqn_clustering.policy_action(s)
            a_and_c = {'a': W, 'c': cluster_index}
            new_state, r, done, _ = env.step(a_and_c)
            print("RL min rate = {}".format(r))
            print("RL state = {}".format(np.log(1 + new_state)))
            s = new_state
            input('Press Enter to continue ...')
Пример #5
0
import os
import sys
import gym
import argparse
import numpy as np
import pandas as pd
import tensorflow as tf
import gym
gym.logger.set_level(40)
from utils.continuous_environments import Environment
env = Environment(gym.make(LunarLanderContinuous - v2),
                  args.consecutive_frames)
env.reset()
state_dim = env.get_state_size()
action_space = gym.make("LunarLanderContinuous-v2").action_space
action_dim = action_space.high.shape[0]
act_range = action_space.high