def main(): env_name = file_name = "Environments/Banana_Linux/Banana.x86_64" train_mode = True # Whether to run the environment in training or inference mode env = UnityEnvironment(file_name=env_name, no_graphics=False) # env = UnityEnvironment(file_name="/data/Banana_Linux_NoVis/Banana.x86_64") # Set the default brain to work with brain_name = env.brain_names[0] brain = env.brains[brain_name] env_info = env.reset(train_mode=True)[brain_name] # Action and Observation spaces nA = brain.vector_action_space_size nS = env_info.vector_observations.shape[1] print('Observation Space {}, Action Space {}'.format(nS, nA)) seed = 7 agent = Priority_DQN(nS, nA, seed, UPDATE_EVERY, BATCH_SIZE, BUFFER_SIZE, MIN_BUFFER_SIZE, LR, GAMMA, TAU, CLIP_NORM, ALPHA) agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth')) # scores = train(agent,env,brain_name) for i in range(1): state = env.reset() img = plt.imshow(env.render(mode='rgb_array')) for j in range(500): action = agent.act(state) img.set_data(env.render(mode='rgb_array')) plt.axis('off') display.display(plt.gcf()) display.clear_output(wait=True) state, reward, done, _ = env.step(action) # save the image plt.savefig('test' + str(j) + '.png', bbox_inches='tight') if done: break # plot the scores plot(scores)
def main(): env = UnityEnvironment( file_name= "/home/faten/projects/deep-reinforcement-learning/p1_navigation/Banana_Linux/Banana.x86_64" ) brain_name = env.brain_names[0] brain = env.brains[brain_name] action_size = brain.vector_action_space_size env_info = env.reset(train_mode=True)[brain_name] state = env_info.vector_observations[0] state_size = len(state) agent = DQNAgent(state_size, action_size, seed=0) scores = train(env, agent) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Epsiode #') plt.show() agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth')) for i in range(3): state = env.reset() for j in range(200): action = agent.act(state) env.render() state, reward, done, _ = env.step(action) if done: break env.close()
class EnvironmentWrapper: def __init__(self, fn='Reacher_Linux_20Agents/Reacher.x86_64'): self.env = UnityEnvironment(file_name=fn) # get the default brain self.brain_name = self.env.brain_names[0] self.brain = self.env.brains[self.brain_name] states = self.reset() self.state_size = states.shape[1] print('Number of agents:', self.num_agents) print('Size of each action:', self.action_size) print('Each observes a state with length: {}'.format(self.state_size)) def render(self): self.env.render() def reset(self): # reset the environment self.env_info = self.env.reset(train_mode=True)[self.brain_name] # number of agents self.num_agents = len(self.env_info.agents) # size of each action self.action_size = self.brain.vector_action_space_size # examine the state space states = self.env_info.vector_observations return states def step(self, actions): env_info = self.env.step(actions)[ self.brain_name] # send all actions to tne environment next_states = env_info.vector_observations # get next state (for each agent) rewards = env_info.rewards # get reward (for each agent) dones = env_info.local_done # see if episode finished return next_states, rewards, dones, env_info def close(self): self.env.close()
env_info = env.step(action)[brain_name] state = env_info.vector_observations[brain_index] reward = env_info.rewards[brain_index] done = env_info.local_done[brain_index] return state, reward, done, env_info from dqn_agent import Agent agent = Agent(state_size=8, action_size=4, seed=0) # watch an untrained agent state = env.reset() for j in range(200): action = agent.act(state) env.render() state, reward, done, _ = env.step(action) if done: break env.close() def dqn( env, brain_index=0, train_mode=True, n_episodes=2000, max_t=1000, eps_start=1.0, eps_end=0.01,