import sys sys.path.append('.') import time import torch import gym import agent, utils agt = agent.DQNAgent() agt.qnet.load_state_dict(torch.load('dqn_agt.pt')) agt.qnet.eval() obs_history = utils.ObsHistory() env = gym.envs.make('Pong-v4') obs = env.reset() obs_history.reset(obs) while True: env.render() phi = obs_history.phi a = agt.act(phi) obs, r, done, _ = env.step(a) obs_history.store(obs) time.sleep(.003) if done: obs = env.reset()
parser.add_argument('--model') #no training episodes parser.add_argument('--eps') parser.add_argument('--render') args = parser.parse_args() if args.tensorboard: writer = SummaryWriter() write_proc = subprocess.Popen(['tensorboard', '--logdir', '{}'.format(args.tensorboard)]) env = env.Environment(args.env) if args.alg == 'DQN': agent = agent.DQNAgent(env, args.mode, args.model, writer) try: if args.mode == 'train': agent.train(int(args.eps), args.render) elif args.mode == 'play': agent.play(int(args.eps)) except KeyboardInterrupt: print('PROCESS KILLED BY USER') finally: env.close() if args.tensorboard: write_proc.terminate()
while not done: self.env.render() a = agent.act(s) s_, r, done, _ = self.env.step(a) agent.learn((s, a, s_, r, done)) self.reward += r s = s_ self.episode_count += 1 self.reward_buffer.append(self.reward) average = sum(self.reward_buffer) / len(self.reward_buffer) print("Episode Nr. {} \nScore: {} \nAverage: {}".format( self.episode_count, self.reward, average)) if __name__ == "__main__": import gym import agent import observer # observer key = 'CartPole-v0' exp = Experiment(key) agent = agent.DQNAgent(exp.env) epsilon = observer.EpsilonUpdater(agent) agent.add_observer(epsilon) exp.run_experiment(agent) #epsilon = observer.EpsilonUpdater(agent) #agent.add_observer(epsilon)
elif str(name).startswith('model_frozen'): param.requires_grad = False param_frozen_list.append(param) else: continue # print(f"param_active_list : {param_active_list}") # print(f"param_frozen_list : {param_frozen_list}") # print(f"model : {list(model_.parameters())}") # print(f"model_a : {list(param_active_list)}") optimizer = optim.Adam(list(model_.parameters()) + list(param_active_list), lr=config.learning_rate) algorithm = "_RND" agent = agent.DQNAgent(models, target_model_, optimizer, device, algorithm) step = 0 episode = 0 reward_list = [] loss_list = [] max_Q_list = [] r_i_list = [] loss_rl_list = [] loss_fm_list = [] # Reset Unity environment and set the train mode according to the environment setting (env_config) env_info = env.reset(train_mode=train_mode, config=config.env_config)[default_brain] # Game loop
def mean(values): return round(sum(values) / len(values), 2) if type(values) == list and len(values) > 0 else 0.0 if __name__ == "__main__": env = gym.make("CartPole-v1") state_size = env.observation_space.shape[0] model_name = input("Model name -> ") my_model = "models/" + model_name + "/model_trained.h5" epsilon = input("Epsilon -> ") print("Loading", my_model, "with epsilon", epsilon) agent = agent.DQNAgent(my_model, float(epsilon)) episode_count = int(input("Episode count -> ")) done = False max_score = None highest_score = 0 scores = [] start = time.time() first_start = start for e in range(episode_count): # at each episode, reset environment to starting position state = env.reset() state = np.reshape(state, [1, state_size])
env = gym.make("Pendulum-v0") state_size = env.observation_space.shape[0] model_name = input("Model name -> ") load_trained = input("Load trained (y/n)? ") load_trained = True if load_trained.lower() == "y" else False my_model_location = "models/" + model_name + "/" my_model = my_model_location + ("model_trained.h5" if load_trained else "model.h5") epsilon = float(input("Epsilon -> ")) # if load_trained else 1.0; print("Loading", my_model, "with epsilon", epsilon) agent = agent.DQNAgent(my_model, epsilon) try: agent.memory = json.load(my_model.replace(".h5", ".json")) except: agent.memory = [] episode_count = int(input("Episode count -> ")) batch_size = 16 max_score = None highest_score = None scores = [] rewards = [] start = time.time()
"""This is just a sample main file to call the non-distributed implementation of the agent, the agent is very easy to create and train without any config needed.""" import sys import agent # Windows CUDA Issue on my Laptop import tensorflow as tf gpus = tf.config.experimental.list_physical_devices('GPU') if len(gpus) > 0: tf.config.experimental.set_memory_growth(gpus[0], True) if __name__ == '__main__': total_steps = 1000 if len(sys.argv) == 2: total_steps = int(sys.argv[1]) a = agent.DQNAgent() a.train(total_steps) a.model.save('model.h5') a.target_model.save('target_model.h5')
if __name__ == "__main__": env = gym.make("MountainCar-v0") state_size = env.observation_space.shape[0] model_name = input("Model name -> ") load_trained = input("Load trained (y/n)? ") load_trained = load_trained.lower() == "y" my_model_location = "models/" + model_name + "/" my_model = my_model_location + ("model_trained.h5" if load_trained else "model.h5") epsilon = input("Epsilon -> ") print("Loading", my_model, "with epsilon", epsilon) agent = agent.DQNAgent(my_model) try: agent.memory = json.load(my_model_trained.replace(".h5", ".json")) except: agent.memory = [] episode_count = int(input("Episode count -> ")) batch_size = 16 max_score = None highest_score = 0 scores = [] rewards = [] start = time.time()
import agent from environment import GymEnvironment import tensorflow as tf env_agent = GymEnvironment() agent = agent.DQNAgent(environment=env_agent) with tf.Session() as sess: agent.build_dqn(sess) sess.run(tf.global_variables_initializer()) agent.train(episodes=50000)
import agent import tensorflow as tf import argparse from environment import GymEnvironment env_agent = GymEnvironment(display=True) agent = agent.DQNAgent(environment=env_agent, display=True) with tf.Session() as sess: agent.build_dqn(sess) sess.run(tf.global_variables_initializer()) agent.load_model() agent.play(10)
help="Boltzmann exploration") parser.add_argument("--render", action="store_true", help="Visualize training") args = parser.parse_args() config_info = { "config_param": args.config, "prefix_path": args.prefix_path, "path_ckpts": args.path_ckpts, "resume": args.resume, } # Create environment env = gym.make(args.env) # Initialize agent if args.boltzmann: agent = agent_boltzmann.DQNAgent(env, args.render, config_info) else: agent = agent.DQNAgent(env, args.render, config_info) # Launch training print(f"\nTraining on {env.unwrapped.spec.id}..\n") agent.train() # Visualize reward evolution agent.plot_reward()