示例#1
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    # Environment Initialization
    if(args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif(args.type=="DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if(args.type=="DDQN"):
        algo = DDQN(action_dim, state_dim, args)
        algo.load_weights(args.model_path)
    elif(args.type=="A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)

    # Display agent
    old_state, time = env.reset(), 0
    while True:
       env.render()
       a = algo.policy_action(old_state)
       old_state, r, done, _ = env.step(a)
       time += 1
       if done: env.reset()

    env.env.close()
示例#2
0
def main():
	args=Arg()
	env = Environment(gym.make(args.env), args.consecutive_frames)
	env.reset()
	state_dim = env.get_state_size()
	action_dim = gym.make(args.env).action_space.n
	algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
	set_session(get_session())
	summary_writer = tf.summary.FileWriter("./tensorboard_" + args.env)
	stats = algo.train(env, args, summary_writer)
	
	
	print(stats)
	algo.save_weights('./'+args.env+'.h5')
	env.env.close()
示例#3
0
def main(args=None):
    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)
    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    # Environment Initialization
    if args.is_ai2thor:
        config_dict = {'max_episode_length': 2000}
        env = AI2ThorEnv(config_dict=config_dict)
        env.reset()
        state = env.reset()
        state_dim = state.shape
        action_dim = env.action_space.n
    elif (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari, is_ai2thor=args.is_ai2thor)
    algo.load_weights(args.actor_path, args.critic_path)

    # Display agent
    old_state, time = env.reset(), 0
    while True:
        a = algo.policy_action(old_state)
        old_state, r, done, _ = env.step(a)
        time += 1
        if done:
            print('----- done, resetting env ----')
            env.reset()
示例#4
0
def main(args=None):

    # Todo: this needs to be changed, just here for the logic
    num_hiddenUnits = 100

    # main parameters
    nb_episodes = 20000  # number of train episodes
    batch_size = 64  # Todo: check in Wang 2017
    consecutive_frames = 4  # Todo: check in Wang 2017
    training_interval = 30  # Todo: check in Wang 2017
    n_threads = 32
    gamma = 0.91
    lr = 0.00075
    entropy_cost = 0.001
    sv_estimate_cost = 0.4
    optimizer = RMSprop(
        lr=lr
    )  # if the optimizers is to be changed, the new one need to be imported
    n_timeSteps = 100
    dataset_size = 2  # Todo: check if this is correct, I reckon it must be the number of diff inputs

    # option statistics
    gather_stats = 'false'
    render = 'false'
    env = 'empty'  #ToDo: implement harlow()

    # LSTMCell parameters
    num_units = 256
    activation = 'tanh'
    recurrent_activation = 'hard_sigmoid'
    use_bias = True
    # further LSTMCell parameters (so far set to default values -> move them up to parameter section when changing!
    kernel_initializer = 'glorot_uniform'
    recurrent_initializer = 'orthogonal'
    bias_initializer = 'zeros'
    unit_forget_bias = True
    kernel_regularizer = None
    recurrent_regularizer = None
    bias_regularizer = None
    kernel_constraint = None
    recurrent_constraint = None
    bias_constraint = None
    dropout = 0.0
    recurrent_dropout = 0.0
    implementation = 1
    activity_regularizer = None
    return_sequences = False
    return_state = False

    # set arguments to a list
    # DO NOT change the order of these arguments, as they are used index wise in create_dm_network
    lstm_param_list = [
        num_units, activation, recurrent_activation, use_bias,
        kernel_initializer, recurrent_initializer, bias_initializer,
        unit_forget_bias, kernel_regularizer, recurrent_regularizer,
        bias_regularizer, kernel_constraint, recurrent_constraint,
        bias_constraint, dropout, recurrent_dropout, implementation,
        activity_regularizer, return_sequences, return_state
    ]

    args = Create_Args(nb_episodes, batch_size, consecutive_frames,
                       training_interval, n_threads, gamma, lr, optimizer,
                       n_timeSteps, gather_stats, render, env)

    set_session(get_session())
    summary_writer = tf.summary.FileWriter('A3C' + "/tensorboard_" + args.env)

    # Environment Initialization # Todo create Harlow env - indep of gym??
    #env = harlow()(gym.make(args.env), args.consecutive_frames)
    #env.reset()
    env_dim = (num_hiddenUnits,
               )  #env.get_state_size() # Todo: replace, understand dim
    action_dim = np.int32(2)  #gym.make(args.env).action_space.n

    # create A3C instance
    algo = A3C(action_dim, env_dim, args.consecutive_frames, lstm_param_list,
               dataset_size, args.gamma, args.lr, args.optimizer,
               args.n_timeSteps)

    # Train
    stats = algo.train(args, summary_writer)

    # Export results to CSV
    if (args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type,
                                                      args.env,
                                                      args.nb_episodes,
                                                      args.batch_size)

    algo.save_weights(export_path)
    args.env.close()
示例#5
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    summary_writer = tf.summary.FileWriter("{}/tensorboard_M1_{}_M1_{}_snr1_{}_snr2_{}".format(args.out_dir, args.M1, args.M1, args.snr_M1, args.snr_M2))

    # Initialize the wireless environment
    users_env = UsersEnvCluster(args.M1, args.M2, args.snr_M1, args.snr_M2, fixed_channel=False)
    print(users_env)

    # Wrap the environment to use consecutive frames
    env = Environment(users_env, args.consecutive_frames)
    env.reset()

    # Define parameters for the DDQN and DDPG algorithms
    state_dim = env.get_state_size()
    action_dim = users_env.action_dim
    act_range = 1
    act_min = 0

    # Initialize the DQN algorithm for the clustering optimization
    n_clusters = users_env.n_clusters
    algo_clustering = DDQN(n_clusters, state_dim, args)

    # Initialize the DDPG algorithm for the beamforming optimization
    algo = DDPG(action_dim, state_dim, act_range, act_min, args.consecutive_frames, algo_clustering, episode_length=args.episode_length)

    if args.step == "train":
        # Train
        stats = algo.train(env, args, summary_writer)

        # Export results to CSV
        if(args.gather_stats):
            df = pd.DataFrame(np.array(stats))
            df.to_csv(args.out_dir + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f')

        # Save weights and close environments
        exp_dir = '{}/models_M1_{}_M2_{}_snr1_{}_snr2_{}/'.format(args.out_dir, args.M1, args.M2, args.snr_M1, args.snr_M2)
        if not os.path.exists(exp_dir):
            os.makedirs(exp_dir)
        # Save DDPG
        export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDPG", args.nb_episodes, args.batch_size)
        algo.save_weights(export_path)

        # Save DDQN
        export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDQN", args.nb_episodes, args.batch_size)
        algo.ddqn_clustering.save_weights(export_path)

    elif args.step == "inference":
        print("Loading the DDPG networks (actor and critic) and the DDQN policy network ...")
        path_actor = '<add the path of the .h5 file of the DDPG actor>'
        path_critic = '<add the path of the .h5 file of the DDPG critic>'
        path_ddqn = '<add the path of the .h5 file of the DDQN actor>'
        algo.load_weights(path_actor, path_critic, path_ddqn)

        # run a random policy during inference as an example
        s = np.random.rand(1, args.Nr)
        s_1 = np.zeros_like(s)
        s = np.vstack((s_1, s))

        while True:
            W = algo.policy_action(s)
            cluster_index = algo.ddqn_clustering.policy_action(s)
            a_and_c = {'a': W, 'c': cluster_index}
            new_state, r, done, _ = env.step(a_and_c)
            print("RL min rate = {}".format(r))
            print("RL state = {}".format(np.log(1 + new_state)))
            s = new_state
            input('Press Enter to continue ...')
示例#6
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env)

    # Environment Initialization
    if(args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif(args.type=="DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if(args.type=="DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif(args.type=="A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif(args.type=="A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
    elif(args.type=="DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if(args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir,
        args.type,
        args.env,
        args.nb_episodes,
        args.batch_size)

    algo.save_weights(export_path)
    env.env.close()
示例#7
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)

    # Environment Initialization
    if (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif (args.type == "DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high

    else:
        if args.env == 'cell':
            #do this
            env = Environment(opticalTweezers(), args.consecutive_frames)
            # env=opticalTweezers(consecutive_frames=args.consecutive_frames)
            env.reset()
            state_dim = (6, )
            action_dim = 4  #note that I have to change the reshape code for a 2d agent # should be 4
        else:
            # Standard Environments
            env = Environment(gym.make(args.env), args.consecutive_frames)
            env.reset()
            state_dim = env.get_state_size()
            print(state_dim)
            action_dim = gym.make(args.env).action_space.n
            print(action_dim)
    # Pick algorithm to train
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "A3C"):
        algo = A3C(action_dim,
                   state_dim,
                   args.consecutive_frames,
                   is_atari=args.is_atari)
    elif (args.type == "DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if (args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Display agent
    old_state, time = env.reset(), 0
    # all_old_states=[old_state for i in range(args.consecutive_frames)]
    while True:
        env.render()
        a = algo.policy_action(old_state)
        old_state, r, done, _ = env.step(a)
        time += 1
        if done: env.reset()
示例#8
0
def main(args=None):
    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    # Environment Initialization
    if args.is_ai2thor:
        config_dict = {'max_episode_length': 500}
        env = AI2ThorEnv(config_dict=config_dict)
        env.reset()
        state = env.reset()
        state_dim = state.shape
        action_dim = env.action_space.n
        args.env = 'ai2thor'
    elif (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
        print(state_dim)
        print(action_dim)
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)
    algo = A3C(action_dim,
               state_dim,
               args.consecutive_frames,
               is_atari=args.is_atari,
               is_ai2thor=args.is_ai2thor)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if args.gather_stats:
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type,
                                                      args.env,
                                                      args.nb_episodes,
                                                      args.batch_size)

    algo.save_weights(export_path)
    env.close()