def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) # Environment Initialization if(args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif(args.type=="DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if(args.type=="DDQN"): algo = DDQN(action_dim, state_dim, args) algo.load_weights(args.model_path) elif(args.type=="A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) elif(args.type=="A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) algo.load_weights(args.actor_path, args.critic_path) elif(args.type=="DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) # Display agent old_state, time = env.reset(), 0 while True: env.render() a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: env.reset() env.env.close()
def main(): args=Arg() env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) set_session(get_session()) summary_writer = tf.summary.FileWriter("./tensorboard_" + args.env) stats = algo.train(env, args, summary_writer) print(stats) algo.save_weights('./'+args.env+'.h5') env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) # Environment Initialization if args.is_ai2thor: config_dict = {'max_episode_length': 2000} env = AI2ThorEnv(config_dict=config_dict) env.reset() state = env.reset() state_dim = state.shape action_dim = env.action_space.n elif (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari, is_ai2thor=args.is_ai2thor) algo.load_weights(args.actor_path, args.critic_path) # Display agent old_state, time = env.reset(), 0 while True: a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: print('----- done, resetting env ----') env.reset()
def main(args=None): # Todo: this needs to be changed, just here for the logic num_hiddenUnits = 100 # main parameters nb_episodes = 20000 # number of train episodes batch_size = 64 # Todo: check in Wang 2017 consecutive_frames = 4 # Todo: check in Wang 2017 training_interval = 30 # Todo: check in Wang 2017 n_threads = 32 gamma = 0.91 lr = 0.00075 entropy_cost = 0.001 sv_estimate_cost = 0.4 optimizer = RMSprop( lr=lr ) # if the optimizers is to be changed, the new one need to be imported n_timeSteps = 100 dataset_size = 2 # Todo: check if this is correct, I reckon it must be the number of diff inputs # option statistics gather_stats = 'false' render = 'false' env = 'empty' #ToDo: implement harlow() # LSTMCell parameters num_units = 256 activation = 'tanh' recurrent_activation = 'hard_sigmoid' use_bias = True # further LSTMCell parameters (so far set to default values -> move them up to parameter section when changing! kernel_initializer = 'glorot_uniform' recurrent_initializer = 'orthogonal' bias_initializer = 'zeros' unit_forget_bias = True kernel_regularizer = None recurrent_regularizer = None bias_regularizer = None kernel_constraint = None recurrent_constraint = None bias_constraint = None dropout = 0.0 recurrent_dropout = 0.0 implementation = 1 activity_regularizer = None return_sequences = False return_state = False # set arguments to a list # DO NOT change the order of these arguments, as they are used index wise in create_dm_network lstm_param_list = [ num_units, activation, recurrent_activation, use_bias, kernel_initializer, recurrent_initializer, bias_initializer, unit_forget_bias, kernel_regularizer, recurrent_regularizer, bias_regularizer, kernel_constraint, recurrent_constraint, bias_constraint, dropout, recurrent_dropout, implementation, activity_regularizer, return_sequences, return_state ] args = Create_Args(nb_episodes, batch_size, consecutive_frames, training_interval, n_threads, gamma, lr, optimizer, n_timeSteps, gather_stats, render, env) set_session(get_session()) summary_writer = tf.summary.FileWriter('A3C' + "/tensorboard_" + args.env) # Environment Initialization # Todo create Harlow env - indep of gym?? #env = harlow()(gym.make(args.env), args.consecutive_frames) #env.reset() env_dim = (num_hiddenUnits, ) #env.get_state_size() # Todo: replace, understand dim action_dim = np.int32(2) #gym.make(args.env).action_space.n # create A3C instance algo = A3C(action_dim, env_dim, args.consecutive_frames, lstm_param_list, dataset_size, args.gamma, args.lr, args.optimizer, args.n_timeSteps) # Train stats = algo.train(args, summary_writer) # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) args.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter("{}/tensorboard_M1_{}_M1_{}_snr1_{}_snr2_{}".format(args.out_dir, args.M1, args.M1, args.snr_M1, args.snr_M2)) # Initialize the wireless environment users_env = UsersEnvCluster(args.M1, args.M2, args.snr_M1, args.snr_M2, fixed_channel=False) print(users_env) # Wrap the environment to use consecutive frames env = Environment(users_env, args.consecutive_frames) env.reset() # Define parameters for the DDQN and DDPG algorithms state_dim = env.get_state_size() action_dim = users_env.action_dim act_range = 1 act_min = 0 # Initialize the DQN algorithm for the clustering optimization n_clusters = users_env.n_clusters algo_clustering = DDQN(n_clusters, state_dim, args) # Initialize the DDPG algorithm for the beamforming optimization algo = DDPG(action_dim, state_dim, act_range, act_min, args.consecutive_frames, algo_clustering, episode_length=args.episode_length) if args.step == "train": # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if(args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.out_dir + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models_M1_{}_M2_{}_snr1_{}_snr2_{}/'.format(args.out_dir, args.M1, args.M2, args.snr_M1, args.snr_M2) if not os.path.exists(exp_dir): os.makedirs(exp_dir) # Save DDPG export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDPG", args.nb_episodes, args.batch_size) algo.save_weights(export_path) # Save DDQN export_path = '{}_{}_NB_EP_{}_BS_{}'.format(exp_dir, "DDQN", args.nb_episodes, args.batch_size) algo.ddqn_clustering.save_weights(export_path) elif args.step == "inference": print("Loading the DDPG networks (actor and critic) and the DDQN policy network ...") path_actor = '<add the path of the .h5 file of the DDPG actor>' path_critic = '<add the path of the .h5 file of the DDPG critic>' path_ddqn = '<add the path of the .h5 file of the DDQN actor>' algo.load_weights(path_actor, path_critic, path_ddqn) # run a random policy during inference as an example s = np.random.rand(1, args.Nr) s_1 = np.zeros_like(s) s = np.vstack((s_1, s)) while True: W = algo.policy_action(s) cluster_index = algo.ddqn_clustering.policy_action(s) a_and_c = {'a': W, 'c': cluster_index} new_state, r, done, _ = env.step(a_and_c) print("RL min rate = {}".format(r)) print("RL state = {}".format(np.log(1 + new_state))) s = new_state input('Press Enter to continue ...')
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if(args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif(args.type=="DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if(args.type=="DDQN"): algo = DDQN(action_dim, state_dim, args) elif(args.type=="A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif(args.type=="A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif(args.type=="DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if(args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif (args.type == "DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: if args.env == 'cell': #do this env = Environment(opticalTweezers(), args.consecutive_frames) # env=opticalTweezers(consecutive_frames=args.consecutive_frames) env.reset() state_dim = (6, ) action_dim = 4 #note that I have to change the reshape code for a 2d agent # should be 4 else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() print(state_dim) action_dim = gym.make(args.env).action_space.n print(action_dim) # Pick algorithm to train if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif (args.type == "DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Display agent old_state, time = env.reset(), 0 # all_old_states=[old_state for i in range(args.consecutive_frames)] while True: env.render() a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: env.reset()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # Environment Initialization if args.is_ai2thor: config_dict = {'max_episode_length': 500} env = AI2ThorEnv(config_dict=config_dict) env.reset() state = env.reset() state_dim = state.shape action_dim = env.action_space.n args.env = 'ai2thor' elif (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() print(state_dim) print(action_dim) else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari, is_ai2thor=args.is_ai2thor) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if args.gather_stats: df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.close()