def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) # Environment Initialization if(args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif(args.type=="DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if(args.type=="DDQN"): algo = DDQN(action_dim, state_dim, args) algo.load_weights(args.model_path) elif(args.type=="A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) elif(args.type=="A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) algo.load_weights(args.actor_path, args.critic_path) elif(args.type=="DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) # Display agent old_state, time = env.reset(), 0 while True: env.render() a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: env.reset() env.env.close()
def run(self): global agent, rl_environment if self.agent_name == self.DqnAgentNameString: rl_environment = gym.make('CartPole-v0').unwrapped agent = DqnAgent( rl_environment=rl_environment, plot_environment_statistics=self.plot_environment_statistics) elif self.agent_name == self.DdpgAgentNameString: rl_environment = gym.make('Pendulum-v0').env agent = Ddpg( rl_environment=rl_environment, plot_environment_statistics=self.plot_environment_statistics) elif self.agent_name == self.A3CAgentNameString: rl_environment = gym.make('Pendulum-v0').unwrapped agent = A3CMain( rl_environment=rl_environment, plot_environment_statistics=self.plot_environment_statistics) agent.initialize_workers() elif self.agent_name == self.A2CAgentNameString: rl_environment = gym.make("CartPole-v0").env agent = A2C( rl_environment=rl_environment, plot_environment_statistics=self.plot_environment_statistics) elif self.agent_name == self.A2CMultiAgentNameString: rl_environment = gym.make("CartPole-v0") agent = A2CMultiAgent( rl_environment=rl_environment, plot_environment_statistics=self.plot_environment_statistics) elif self.agent_name == self.TRPOAgentNameString: rl_environment = gym.make("Pendulum-v0").unwrapped agent = TRPO( rl_environment=rl_environment, plot_environment_statistics=self.plot_environment_statistics) agent.train(rl_environment) agent.test_agent(rl_environment)
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if(args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif(args.type=="DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if(args.type=="DDQN"): algo = DDQN(action_dim, state_dim, args) elif(args.type=="A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif(args.type=="A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif(args.type=="DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if(args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.env.close()
#target_fixation_time = 2 # Configuration parameters save_variables = 0 save_figures = 0 hidden_representations = np.zeros([2,10]) """ RUN """ for learning_rate in learning_rates: # define empty states as initial values empty_state_memory = np.zeros([num_steps_unrolled, input_size]) # for fixation period shortempty_state = np.zeros(10) # create instance of Advantage Actor Critic algorithm = A2C(a_size, input_size, num_lstm_units, num_steps_unrolled, gamma, learning_rate, print_summary=True) # empty variables for storing actions, rewards, states and trial indices reward_pertrial_matrix = np.zeros([num_episode_train, num_trial_per_episode]) rewarded_image = np.zeros([num_episode_train, num_trial_per_episode]) selected_image = np.zeros([num_episode_train, num_trial_per_episode]) discounted_rewards = np.zeros([num_episode_train, num_trial_per_episode]) state_values = [] action_values = np.zeros([num_episode_train, num_trial_per_episode, a_size]) run_duration = 0 model = [] time_start = time.time() state_memory = np.array(empty_state_memory) # run episodes
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif (args.type == "DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: if args.env == 'cell': #do this env = Environment(opticalTweezers(), args.consecutive_frames) # env=opticalTweezers(consecutive_frames=args.consecutive_frames) env.reset() state_dim = (6, ) action_dim = 4 #note that I have to change the reshape code for a 2d agent # should be 4 else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() print(state_dim) action_dim = gym.make(args.env).action_space.n print(action_dim) # Pick algorithm to train if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif (args.type == "DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Display agent old_state, time = env.reset(), 0 # all_old_states=[old_state for i in range(args.consecutive_frames)] while True: env.render() a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: env.reset()