Exemplo n.º 1
0
def main():
    env_name = file_name = "Environments/Banana_Linux/Banana.x86_64"
    train_mode = True  # Whether to run the environment in training or inference mode
    env = UnityEnvironment(file_name=env_name, no_graphics=False)
    # env = UnityEnvironment(file_name="/data/Banana_Linux_NoVis/Banana.x86_64")
    # Set the default brain to work with
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=True)[brain_name]
    # Action and Observation spaces
    nA = brain.vector_action_space_size
    nS = env_info.vector_observations.shape[1]
    print('Observation Space {}, Action Space {}'.format(nS, nA))
    seed = 7
    agent = Priority_DQN(nS, nA, seed, UPDATE_EVERY, BATCH_SIZE, BUFFER_SIZE,
                         MIN_BUFFER_SIZE, LR, GAMMA, TAU, CLIP_NORM, ALPHA)
    agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))
    # scores = train(agent,env,brain_name)
    for i in range(1):
        state = env.reset()
        img = plt.imshow(env.render(mode='rgb_array'))
        for j in range(500):
            action = agent.act(state)
            img.set_data(env.render(mode='rgb_array'))
            plt.axis('off')
            display.display(plt.gcf())
            display.clear_output(wait=True)
            state, reward, done, _ = env.step(action)
            # save the image
            plt.savefig('test' + str(j) + '.png', bbox_inches='tight')
            if done:
                break
    # plot the scores
    plot(scores)
Exemplo n.º 2
0
def main():

    env = UnityEnvironment(
        file_name=
        "/home/faten/projects/deep-reinforcement-learning/p1_navigation/Banana_Linux/Banana.x86_64"
    )

    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    action_size = brain.vector_action_space_size
    env_info = env.reset(train_mode=True)[brain_name]
    state = env_info.vector_observations[0]
    state_size = len(state)

    agent = DQNAgent(state_size, action_size, seed=0)

    scores = train(env, agent)

    # plot the scores
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.plot(np.arange(len(scores)), scores)
    plt.ylabel('Score')
    plt.xlabel('Epsiode #')
    plt.show()

    agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))

    for i in range(3):
        state = env.reset()
        for j in range(200):
            action = agent.act(state)
            env.render()
            state, reward, done, _ = env.step(action)
            if done:
                break

    env.close()
class EnvironmentWrapper:
    def __init__(self, fn='Reacher_Linux_20Agents/Reacher.x86_64'):
        self.env = UnityEnvironment(file_name=fn)

        # get the default brain
        self.brain_name = self.env.brain_names[0]
        self.brain = self.env.brains[self.brain_name]
        states = self.reset()
        self.state_size = states.shape[1]
        print('Number of agents:', self.num_agents)
        print('Size of each action:', self.action_size)
        print('Each observes a state with length: {}'.format(self.state_size))

    def render(self):
        self.env.render()

    def reset(self):
        # reset the environment
        self.env_info = self.env.reset(train_mode=True)[self.brain_name]
        # number of agents
        self.num_agents = len(self.env_info.agents)
        # size of each action
        self.action_size = self.brain.vector_action_space_size
        # examine the state space
        states = self.env_info.vector_observations
        return states

    def step(self, actions):
        env_info = self.env.step(actions)[
            self.brain_name]  # send all actions to tne environment
        next_states = env_info.vector_observations  # get next state (for each agent)
        rewards = env_info.rewards  # get reward (for each agent)
        dones = env_info.local_done  # see if episode finished
        return next_states, rewards, dones, env_info

    def close(self):
        self.env.close()
Exemplo n.º 4
0
    env_info = env.step(action)[brain_name]
    state = env_info.vector_observations[brain_index]
    reward = env_info.rewards[brain_index]
    done = env_info.local_done[brain_index]
    return state, reward, done, env_info


from dqn_agent import Agent

agent = Agent(state_size=8, action_size=4, seed=0)

# watch an untrained agent
state = env.reset()
for j in range(200):
    action = agent.act(state)
    env.render()
    state, reward, done, _ = env.step(action)
    if done:
        break

env.close()


def dqn(
    env,
    brain_index=0,
    train_mode=True,
    n_episodes=2000,
    max_t=1000,
    eps_start=1.0,
    eps_end=0.01,