def main(unused_argv): del unused_argv configs = extract_configs(*FLAGS.eval_config) # instantiate the Banana env env = BananaWrapper(file_name="./Banana") state_size = env.observation_size action_size = env.action_size # instantiate agent object agent = Agent(state_size=state_size, action_size=action_size, configs=configs) # load trained model agent.qnetwork_local.load_state_dict( torch.load("results/checkpoints/DoubleDQN.pth")) horizon = 1000 episodes = 5 for _ in range(episodes): state = env.reset() for _ in range(horizon): # Perform action given the trained policy action = agent.act(state) next_state, reward, done = env.step(action) time.sleep(0.05) if done: break state = next_state # close env env.close()
def main(): ################################################ # components requred from main_02.py ################################################ # spin up environment env = gym.make('LunarLander-v2') env.seed(0) # spin up agent (with underlying nn model) agent = Agent(state_size=8, action_size=4, seed=0) ################################################ # Import trained agent and render performance ################################################ # load the weights from file agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth')) for i in range(10): state = env.reset() img = plt.imshow(env.render(mode='rgb_array')) for j in range(400): action = agent.act(state) img.set_data(env.render(mode='rgb_array')) plt.axis('off') state, reward, done, _ = env.step(action) if done: break env.close()
def process(args): env = UnityEnvironment(file_name="Banana.app") brain_name = env.brain_names[0] brain = env.brains[brain_name] env_info = env.reset(train_mode=False)[brain_name] # reset the environment state = env_info.vector_observations[0] # get the current state score = 0 # initialize the score action_size = brain.vector_action_space_size state_size = len(state) agent = Agent(state_size, action_size, 1, args.model_path) while True: action = agent.act(state, 0.0) # select an action env_info = env.step(action)[ brain_name] # send the action to the environment next_state = env_info.vector_observations[0] # get the next state reward = env_info.rewards[0] # get the reward done = env_info.local_done[0] # see if episode has finished score += reward # update the score state = next_state # roll over the state to next time step if done: # exit loop if episode finished break print("Score: {}".format(score))
def main(): env = UnityEnvironment(file_name="./../Banana.app") # get the default brain brain_name = env.brain_names[0] brain = env.brains[brain_name] # Instantiate agent: env_info = env.reset(train_mode=False)[brain_name] #agent = Agent(state_size=8, action_size=4, seed=0) action_size = brain.vector_action_space_size state = env_info.vector_observations[0] state_size = len(state) agent = Agent(state_size=state_size, action_size=action_size, seed=0) # load the weights from file agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth')) for i in range(3): #state = env.reset() env_info = env.reset(train_mode=False)[brain_name] state = env_info.vector_observations[0] for j in range(200): action = agent.act(state) #state, reward, done, _ = env.step(action) env_info = env.step(action)[brain_name] next_state = env_info.vector_observations[0] reward = env_info.rewards[0] done = env_info.local_done[0] if done: break state = next_state env.close()
def test(self, run_id=5): agent = Agent(state_size=37, action_size=4, seed=0) run_dir = "results/{}".format(run_id) # load the weights from file agent.qnetwork_local.load_state_dict( torch.load("{}/checkpoint.pth".format(run_dir))) for i in range(5): env_info = self.env.reset( train_mode=False)[self.brain_name] # reset the environment state = env_info.vector_observations[0] for j in range(50): action = agent.act(state) env_info = self.env.step(action)[ self.brain_name] # send the action to the environment next_state = env_info.vector_observations[ 0] # get the next state reward = env_info.rewards[0] # get the reward done = env_info.local_done[0] # see if episode has finished if done: break
def test(n_epi): agent = Agent(state_size=37, action_size=4, seed=0) env = UnityEnvironment(file_name="Banana.app") brain_name = env.brain_names[0] # get the default brain brain = env.brains[brain_name] agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth')) for i in range(n_epi): scores = [] # list containing scores from each episode scores_window = deque(maxlen=100) # last 100 scores score = 0 # initialize the score env_info = env.reset( train_mode=False)[brain_name] # reset the environment state = env_info.vector_observations[0] # get the current state while True: action = agent.act(state) env_info = env.step(action)[ brain_name] # send the action to the environment next_state = env_info.vector_observations[0] # get the next state reward = env_info.rewards[0] # get the reward score += reward # update the score done = env_info.local_done[0] # see if episode has finished agent.step(state, action, reward, next_state, done) state = next_state # roll over the state to next time step if done: break scores_window.append(score) # save most recent score scores.append(score) # save most recent score print('\rEpisode {}\tAverage Score: {:.2f}'.format( i, np.mean(scores_window))) env.close()
def DQN_gif(file_name): env = gym.make('LunarLander-v2') env.seed(0) agent = Agent(state_size=8, action_size=4, seed=0) agent.qnetwork_local.load_state_dict( torch.load('checkpoint.pth', map_location=lambda storage, loc: storage)) images = [] state = env.reset() img = env.render(mode='rgb_array') for j in range(200): action = agent.act(state) state, reward, done, _ = env.step(action) frame = env.render(mode='rgb_array') pil_img = Image.fromarray(frame) draw = ImageDraw.Draw(pil_img) text = 'Step = {}\nReward = {}'.format(j + 1, reward) draw.text((20, 20), text, (255, 255, 255)) images.append(np.asarray(pil_img)) if done: break imageio.mimsave(file_name, images)
def main( file_name="/Users/joshuaschoenfield/Downloads/Banana.app", weights_file="checkpoint_banana_2_LONG_SAFE.pth", ): with get_environment(file_name=file_name) as env: from dqn_agent import Agent agent = Agent(state_size=37, action_size=4, seed=0) agent.qnetwork_local.load_state_dict(torch.load(weights_file)) scores = [] num_iterations = 100 for i in range(num_iterations): state = reset_and_get_first_state(env, train_mode=True) score = 0 for j in range(2000): action = agent.act(state, eps=0) # env.render() state, reward, done = get_next_state_reward_done(env, action) score += reward if done: break scores.append(score) # print(f"Score: {score}") # print(f"Average Score: {np.mean(scores)}") ax = plot_score_cumulative_distribution(scores) ax.figure.savefig("Media/validation_scores_cumulative.png") # plt.show() np.savetxt("validation_scores.txt", scores) return scores
def main(): agent = Agent(state_size=3, action_size=8, seed=0) start_pos = (200, 600) end_pos = (800, 375) #start_pos = (200,500) #end_pos = (800,500) env = environment(MAP, start_pos, end_pos) """ x_end, y_end = end_pos plt.figure(figsize=(10,6), dpi=200) plt.plot(start_pos[0], start_pos[1], 'rx') plt.plot(x_end, y_end, 'bx') plt.contourf(np.array(MAP), linestyles='dashed') #plt.imshow(np.array(MAP)) plt.gca().set_aspect('equal', adjustable='box') plt.colorbar() plt.show() sys.exit(()) """ # load the weights from file agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth')) for i in range(1): path_x = [start_pos[0]] path_y = [start_pos[1]] state, _, _ = env.reset(start_pos, end_pos) for j in range(6000): action = agent.act(state) #print (j, action) print(j) #if j%100 == 0: # env.render() state, reward, done = env.step(action) path_x.append(state[0]) path_y.append(state[1]) if done: break print(done) x_end, y_end = end_pos plt.figure(figsize=(10, 6), dpi=200) plt.plot(path_x, path_y, 'ro', markevery=20) plt.plot(x_end, y_end, 'bx') plt.contourf(np.array(MAP), linestyles='dashed') #plt.imshow(np.array(MAP)) plt.gca().set_aspect('equal', adjustable='box') plt.colorbar() plt.show() env.close()
def dqn(LR, GAMMA, TAU, BUFF, UPD, n_episodes=1000, max_t=100, eps_start=1.0, eps_end=0.01, eps_decay=0.995): """Deep Q-Learning. Params ====== n_episodes (int): maximum number of training episodes max_t (int): maximum number of timesteps per episode eps_start (float): starting value of epsilon, for epsilon-greedy action selection eps_end (float): minimum value of epsilon eps_decay (float): multiplicative factor (per episode) for decreasing epsilon """ agent = Agent(state_size, action_size, LR, GAMMA, TAU, BUFF, UPD, seed=0) scores = [] # list containing scores from each episode scores_window = deque(maxlen=100) # last 100 scores eps = eps_start # initialize epsilon for i_episode in range(1, n_episodes + 1): env_info = env.reset(train_mode=True)[brain_name] state = env_info.vector_observations[0] score = 0 for t in range(max_t): action = agent.act(state, eps) env_info = env.step(action)[brain_name] next_state = env_info.vector_observations[0] reward = env_info.rewards[0] done = env_info.local_done[0] agent.step(state, action, reward, next_state, done) state = next_state score += reward if done: break scores_window.append(score) # save most recent score scores.append(score) # save most recent score eps = max(eps_end, eps_decay * eps) # decrease epsilon print('\rEpisode {}\tAverage Score: {:.2f}'.format( i_episode, np.mean(scores_window)), end="") if i_episode % 100 == 0: print('\rEpisode {}\tAverage Score: {:.2f}'.format( i_episode, np.mean(scores_window))) # if np.mean(scores_window)>=13.0: # print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window))) torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth') #break # return scores return np.mean(scores_window)
def dqn(args, eps_start=1.0, eps_end=0.01, eps_decay=0.995): """Deep Q-Learning. Params ====== args : command line arguments n_episodes (int): maximum number of training episodes max_t (int): maximum number of timesteps per episode eps_start (float): starting value of epsilon, for epsilon-greedy action selection eps_end (float): minimum value of epsilon eps_decay (float): multiplicative factor (per episode) for decreasing epsilon """ scores = [] # list containing scores from each episode scores_window = deque(maxlen=100) # last 100 scores eps = eps_start # initialize epsilon state_size = 37 action_size = 4 agent = Agent(state_size, action_size, 1) for i_episode in range(1, args.num_episodes + 1): #resetting the environment for a new episode env_info = env.reset(train_mode=True)[brain_name] state = env_info.vector_observations[0] score = 0 cnt = 0 while True: action = agent.act(state, eps) env_info = env.step(action)[ brain_name] # send the action to the environment next_state = env_info.vector_observations[0] # get the next state reward = env_info.rewards[0] # get the reward done = env_info.local_done[0] agent.step(state, action, reward, next_state, done) state = next_state score += reward cnt += 1 if done: break scores_window.append( score) # save most recent score in the 100 episode window scores.append(score) # save most recent score eps = max(eps_end, eps_decay * eps) # decrease epsilon print('\rEpisode {}\tAverage Score in the last 100 episodes: {:.2f}'. format(i_episode, np.mean(scores_window)), end="") if i_episode % args.save_every == 0: print( '\nSaving Checkpoint for {:d} episodes!\tAverage Score: {:.2f}' .format(i_episode, np.mean(scores_window))) torch.save( agent.qnetwork_local.state_dict(), os.path.join(args.save_checkpoint_path, 'checkpoint_' + str(i_episode) + '.pth')) return scores
def test(dev, weights_file, n_episodes=100, max_t=1000): """Test the environment with the parameters stored in weights_file Params ====== dev (string): cpu or gpu weights_file (string): name of the file to load the weights n_episodes (int): number of test episodes that will be performed max_t (int): maximum number of timesteps per episode """ env = UnityEnvironment(file_name='./Banana_Linux/Banana.x86_64') brain_name = env.brain_names[0] brain = env.brains[brain_name] env_info = env.reset(train_mode=False)[brain_name] state_size = len(env_info.vector_observations[0]) action_size = brain.vector_action_space_size agent = Agent(state_size, action_size, seed=0, device=dev) # load the weights from file print('Loading weights') try: checkpoint = torch.load(weights_file) except FileNotFoundError: print('Error: File \'{}\' not found'.format(weights_file)) sys.exit(1) agent.qnetwork_local.load_state_dict(checkpoint) scores = [] print('Running {} episodes'.format(n_episodes)) for i_episode in range(1, n_episodes + 1): env_info = env.reset(train_mode=False)[brain_name] score = 0 state = env_info.vector_observations[0] for j in range(max_t): action = agent.act(state) env_info = env.step(action)[brain_name] state = env_info.vector_observations[0] reward = env_info.rewards[0] done = env_info.local_done[0] score += reward if done: break scores.append(score) if (i_episode % 100 != 0): print('\rEpisode {}\tScore: {:.0f}\tAverage Score: {:.2f}'.format( i_episode, score, np.mean(scores)), end="") else: print('\rEpisode {}\tScore: {:.0f}\tAverage Score: {:.2f}'.format( i_episode, score, np.mean(scores))) env.close()
def __init__(self, name, state_size, action_size, env, load_net=False): self.agent = Agent(name, state_size=state_size, action_size=action_size, seed=0) self.env = env self.saved_network = name + '_dqn_checkpoint.pth' self.load_net = load_net if load_net: print('Loading pretrained network...') self.agent.qnetwork_local.load_state_dict( torch.load(self.saved_network)) self.agent.qnetwork_target.load_state_dict( torch.load(self.saved_network)) print('Loaded.')
def main(FLAGS): #env = UnityEnvironment(file_name="Banana_Linux_NoVis/Banana.x86_64", docker_training=FLAGS.docker_training, no_graphics=FLAGS.no_graphics) env = UnityEnvironment(file_name="Banana_Linux_NoVis/Banana.x86_64", no_graphics=FLAGS.no_graphics) # get the default brain brain_name = env.brain_names[0] brain = env.brains[brain_name] env_info = env.reset(train_mode=True)[brain_name] action_size = brain.vector_action_space_size state = env_info.vector_observations[0] state_size = len(state) FLAGS = vars(FLAGS) agent = Agent(state_size=state_size, action_size=action_size, **FLAGS) scores = dqn(env, brain_name, agent, **FLAGS) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.show()
def run(learning_strategy, n_episodes, experiment_name): experiment_name = os.path.basename(__file__).split( '.')[0] if experiment_name == None else experiment_name mlflow.set_experiment(experiment_name) with mlflow.start_run(): mlflow.log_param('Learning Strategy', learning_strategy) env = create_env() agent = Agent(state_size=8, action_size=4, seed=0, learning_strategy=LearningStrategy[learning_strategy]) checkpoint_directory = './artifacts/checkpoints' if not os.path.exists(checkpoint_directory): os.makedirs(checkpoint_directory) with ArtifactHandler() as _: scores = dqn(env, agent, n_episodes=n_episodes, checkpoint_directory=checkpoint_directory) print("Saving scores..") np.savetxt('./artifacts/scores.txt', scores)
def main(): ################################################ # components requred from main_02.py ################################################ # spin up environment env = gym.make('LunarLander-v2') env.seed(0) # spin up agent (with underlying nn model) agent = Agent(state_size=8, action_size=4, seed=0) ################################################ # Train the Agent with DQN ################################################ # train the agent scores = dqn(env, agent) # plot the scores that the agent received while training fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.xlabel('Episode #') plt.ylabel('Score') plt.show()
def navigate_smart(model_file: Path) -> None: """Take the model trained the longest to navigate through the Banana environment""" # Init environment. env, brain_name, state_size, action_size, state = init_env( settings.env_file, train_mode=False) # Load last trained model. agent: Agent = Agent(state_size, action_size, random.randint(0, 100)) agent.qnetwork_local.load_state_dict(torch.load(model_file)) agent.qnetwork_local.eval() score: float = 0.0 while True: action: int = agent.act(state) env_info: BrainInfo = env.step(action)[brain_name] next_state: np.ndarray = env_info.vector_observations[0] reward: float = env_info.rewards[0] done: bool = env_info.local_done[0] state = next_state score += reward if done: break print("Score: {}".format(score))
def main(): # GPU print('Is GPU available?', torch.cuda.is_available()) print() # Environment env = gym.make(ENV) env.seed(0) print('Environment:', env) print('State shape:', env.observation_space.shape) print('Number of actions:', env.action_space.n) print() # DQN agent agent = Agent(state_size=STATE_SIZE, action_size=ACTION_SIZE, seed=SEED) # Training scores = dqn(n_episodes=N_EPISODES, max_t=MAX_T, eps_start=EPS_START, eps_end=EPS_END, eps_decay=EPS_DECAY, env=env, agent=agent) # Save score print(scores[-5:]) pickle.dump(scores, open(PATH_SCORE, 'wb')) print('Saved score') # Visualize training result plot_score(scores)
def initialize_env(): env = atari_wrappers.make_atari('RiverraidNoFrameskip-v4') env = atari_wrappers.wrap_deepmind(env, clip_rewards=False, frame_stack=True, pytorch_img=True) agent = Agent(in_channels=4, action_size=18, seed=0) ####initial network#### agent.qnetwork_target.load_model( torch.load('./data/dqn_Riverraid_qnetwork_target_state_dict.pth')) agent.qnetwork_local.load_model( torch.load('./data/dqn_Riverraid_local_model_state_dict.pth')) ####initial the buffer replay#### while len(agent.memory) < BUFFER_INI: observation = env.reset() done = False while not done: action = random.sample(range(env.action_space.n), 1)[0] next_observation, reward, done, info = env.step(action) agent.memory.add(observation, action, reward, next_observation, done) observation = next_observation print("Replay Buffer Initialized") return env, agent
def main(): env = World() agent = Agent(state_size=env.observation_space, action_size=env.action_space, seed=0) scores = dqn(env, agent) with open('./DQN/scores.pkl', 'wb') as f: pickle.dump(scores, f, protocol=pickle.HIGHEST_PROTOCOL) # plot the scores scores_window = deque(maxlen=100) avg_scores = [] for score in scores: scores_window.append(score) avg_scores.append(np.mean(scores_window)) fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores, linewidth=0.5, label='score') plt.plot(np.arange(len(avg_scores)), avg_scores, linewidth=2, label='MA score') plt.legend(['score', 'MA score']) plt.ylabel('Score') plt.xlabel('Episode #') fig.savefig('./DQN/training_curve.png')
def main(unused_argv): del unused_argv configs = extract_configs(*FLAGS.config) training = configs["training"] label = configs["agent"]["name"] # plot the scores fig = plt.figure() ax = fig.add_subplot(111) env = BananaWrapper(file_name="./Banana") state_size = env.observation_size action_size = env.action_size agent = Agent(state_size=state_size, action_size=action_size, configs=configs) scores = dqn(env=env, agent=agent, label=label, **training) ax.plot(np.arange(len(scores)), scores, label=label) plt.ylabel("Score") plt.xlabel("Episode #") ax.legend(loc="upper center", shadow=True, fontsize="small") plt.savefig("results/plots/" + str(label)) plt.show()
def play_banana(isDoubleDQN=0): isDoubleDQN = int(isDoubleDQN) # find the path to the environment, this can be different for different OS env = UnityEnvironment(file_name="Banana_Windows_x86_64\Banana.exe") # get the default brain brain_name = env.brain_names[0] brain = env.brains[brain_name] # reset the environment env_info = env.reset(train_mode=True)[brain_name] # number of actions action_size = brain.vector_action_space_size # examine the state space state = env_info.vector_observations[0] state_size = len(state) # instantiate agent agent = Agent(state_size=state_size, action_size=action_size, seed=0, isDoubleDQN=isDoubleDQN) # load the weights from file if (isDoubleDQN==1): print("Using Double DQN") agent.qnetwork_local.load_state_dict(torch.load('checkpoint_double_agent.pth')) else: print("Not Using Double DQN") agent.qnetwork_local.load_state_dict(torch.load('checkpoint_simple_agent.pth')) # start the agent env_info = env.reset(train_mode=False)[brain_name] # reset the environment state = env_info.vector_observations[0] # get the current state score = 0 # initialize the score while True: action = agent.act(state, eps=0) # select an action env_info = env.step(action)[brain_name] # send the action to the environment next_state = env_info.vector_observations[0] # get the next state reward = env_info.rewards[0] # get the reward done = env_info.local_done[0] # see if episode has finished score += reward # update the score state = next_state # roll over the state to next time step if done: # exit loop if episode finished break print("Score: {}".format(score)) env.close()
def main(): # Parse arguments: parser = argparse.ArgumentParser(fromfile_prefix_chars='@') parser.add_argument("--env_path", type=str, help='Path to the ml-agents environment file') args = parser.parse_args() # instantiate environment: # - Make sure you don't have the same environment already opened in jupyter # notebook or with other python. # - Do not try disabling rendering. Visual observations will not work. env = UnityEnvironment(file_name=args.env_path) # get the default brain brain_name = env.brain_names[0] brain = env.brains[brain_name] ### instantiate agent env_info = env.reset(train_mode=False)[brain_name] # get action size action_size = brain.vector_action_space_size # get input size # fist, get frame from emulator frame = env_info.visual_observations[0] # set deque to store frames stack to input the neural network state_buffer = deque(maxlen=NUM_FRAMES) # preprocess frame and stack it to build the state frame = preprocess_image(frame) state = stack_frames(frame, state_buffer) state_size = state.shape # finally, instantiate agent agent = Agent(state_size=state_size, action_size=action_size, seed=0) ### # start training scores = dqn(env, brain_name, agent) # close environment env.close() # Write scores in file for later plot edition: #np.save("scores", scores) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') print("To finish the program, manually close the plot window.") plt.show() print("Done!")
def trainFunction(n_episodes=2000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995): agent = Agent(state_size=37, action_size=4, seed=0, priority=True) epsilons = [] scores = [] # list containing scores from each episode scores_window = deque(maxlen=100) # last 100 scores eps = eps_start # initialize epsilon for i_episode in range(1, n_episodes + 1): env_info = env.reset( train_mode=True)[brain_name] # reset the environment state = env_info.vector_observations[0] score = 0 for t in range(max_t): action = agent.act(state, eps) env_info = env.step(action.astype(np.int32))[brain_name] next_state = env_info.vector_observations[0] # get the next state reward = env_info.rewards[0] # get the reward done = env_info.local_done[0] # see if episode has finished agent.step(state, action, reward, next_state, done) state = next_state score += reward if done: break scores_window.append(score) # save most recent score scores.append(score) # save most recent score eps = max(eps_end, eps_decay * eps) # decrease epsilon epsilons.append(eps) print('\rEpisode {}\tAverage Score: {:.2f}'.format( i_episode, np.mean(scores_window)), end="") if i_episode % 100 == 0: print('\rEpisode {}\tAverage Score: {:.2f}'.format( i_episode, np.mean(scores_window)), end="") # if np.mean(scores_window)>=13.0: print('\nEnvironment finished in {:d} episodes!\tAverage Score: {:.2f}'. format(i_episode, np.mean(scores_window))) torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth') return scores, epsilons
def game_start(game_name): ''' initial the environment ''' env = gym.make(game_name) env.seed(0) print('State shape: ', env.observation_space.shape) print('Number of actions: ', env.action_space.n) agent = Agent(state_size=8, action_size=4, seed=0) return env, agent
def main(): params = Params(n_episodes=50000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995) environment = Environment('/Users/dali/workspace/RL/Reacher.app') # environment = Environment('/Users/dali/workspace/RL/Banana.app') agent = Agent(environment=environment, seed=0, device=device) dqn(agent, params)
def main(file_name="/Users/joshuaschoenfield/Downloads/Banana.app", with_plotting=True): with get_environment(file_name=file_name) as env: from dqn_agent import Agent agent = Agent(state_size=37, action_size=4, seed=0) scores, running_average = dqn(env=env, agent=agent, with_plotting=with_plotting) return scores, running_average
def testFunction(): agent = Agent(state_size=37, action_size=4, seed=0) agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth')) env_info = env.reset(train_mode=False)[brain_name] # reset the environment state = env_info.vector_observations[0] # get the current state score = 0 # initialize the score time_steps = 100000 for t in range(time_steps): action = agent.act(state) # select an action env_info = env.step(action.astype( np.int32))[brain_name] # send the action to the environment next_state = env_info.vector_observations[0] # get the next state reward = env_info.rewards[0] # get the reward done = env_info.local_done[0] # see if episode has finished score += reward # update the score state = next_state # roll over the state to next time step if done: # exit loop if episode finished break print("Score: {}".format(score))
def main(): parser = argparse.ArgumentParser(description="Run Extended Q-Learning with given config") parser.add_argument("-c", "--config", type=str, metavar="", required=True, help="Config file name - file must be available as .json in ./configs") args = parser.parse_args() # load config files with open(os.path.join(".", "configs", args.config), "r") as read_file: config = json.load(read_file) env = UnityEnvironment(file_name=os.path.join(*config["general"]["env_path"])) agent = Agent(config=config) if config["train"]["run_training"]: scores = sessions.train(agent, env, config) helper.plot_scores(scores) agent.save() else: agent.load() sessions.test(agent, env) env.close()
def run(agent_source, location, n_episodes): source = AgentSource[agent_source.upper()] agent = Agent(state_size=8, action_size=4, seed=0, learning_strategy=LearningStrategy.DQN) path_to_agent_checkpoint = retrieve_agent_checkpoint(source, location) agent.qnetwork_local.load_state_dict( torch.load(path_to_agent_checkpoint, map_location=lambda storage, loc: storage)) #display = Display(visible=0, size=(1400, 900)) #display.start() env = gym.make('LunarLander-v2') env.seed(0) print('State shape: ', env.observation_space.shape) print('Number of actions: ', env.action_space.n) for i in range(n_episodes): state = env.reset() #img = plt.imshow( env.render(mode='rgb_array') for j in range(500): action = agent.act(state) #img.set_data( env.render(mode='rgb_array') plt.axis('off') time.sleep(0.1) #display.display(plt.gcf()) #display.clear_output(wait=True) state, reward, done, _ = env.step(action) if done: break env.close()
env = gym.make('LunarLander-v2') env.seed(0) print('State shape: ', env.observation_space.shape) print('Number of actions: ', env.action_space.n) # Please refer to the instructions in `Deep_Q_Network.ipynb` if you would like to write your own DQN agent. Otherwise, run the code cell below to load the solution files. # In[4]: from dqn_agent import Agent agent = Agent(state_size=8, action_size=4, seed=0) # watch an untrained agent state = env.reset() for j in range(200): action = agent.act(state) env.render() state, reward, done, _ = env.step(action) if done: break env.close() # ### 3. Train the Agent with DQN #