def trainer():
    env = GameEnv(N_agents=8, enemies_stationary=True)
    # env.seed(0)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    st.write("training using device {}".format(device))
    agentArgs = {
        "device": "cuda:0",
        "state_size": env.getStateDimension(),
        "action_size": 2,
        "seed": 0,
    }
    agent = Agent(**agentArgs)

    runner(env, agent, "ddqn")
Exemplo n.º 2
0
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

agent = Agent(state_size=state_size, action_size=action_size)
# agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))


def dqn(n_episodes=2000,
        max_t=1000,
        eps_start=0.9,
        eps_end=0.01,
        eps_decay=0.95):
    """Deep Q-Learning.

    Params
    ======
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
Exemplo n.º 3
0
env_info = env.reset(train_mode=False)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

agent = Agent(state_size=state_size, action_size=action_size)
agent.qnetwork_local.load_state_dict(
    torch.load(dirpath + "/checkpoint.pth"))
max_t = 1000
for i in range(10):
    env_info = env.reset(train_mode=False)[brain_name]  # reset the environment
    state = env_info.vector_observations[0]
    score = 0
    for t in range(max_t):
        action = agent.act(state, 0.01)
        # send the action to the environment
        env_info = env.step(action)[brain_name]
        next_state = env_info.vector_observations[0]   # get the next state
        reward = env_info.rewards[0]                   # get the reward
        done = env_info.local_done[0]                  # see if episode has
        score += reward
import gym
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt

env = gym.make('LunarLander-v2')
env.seed(0)
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space.n)

from ddqn_agent import Agent

agent = Agent(state_size=8, action_size=4, seed=0)

# watch an untrained agent
state = env.reset()
score = 0
for j in range(200):
    action = agent.act(state)
    env.render()
    state, reward, done, _ = env.step(action)
    score += reward
    if done:
        break
print('Score: %d' % score)
env.close()
Exemplo n.º 5
0
import gym
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt


env = gym.make('LunarLander-v2')
env.seed(0)
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space.n)

from ddqn_agent import Agent

agent = Agent(state_size=8, action_size=4, seed=0)

# watch an untrained agent
state = env.reset()
score=0

def dqn(n_episodes=2000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995):
    """Deep Q-Learning.
    
    Params
    ======
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
Exemplo n.º 6
0
env = UnityEnvironment(file_name="./Banana.app")

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# get the state and action size from env
env_info = env.reset(train_mode=False)[brain_name]
action_size = brain.vector_action_space_size
state = env_info.vector_observations[0]
state_size = len(state)

# initialize agent from trained model
algo = 'ddqn'
agent = Agent(state_size, action_size, seed=10, algo=algo)
print(f'Create agent from trained {algo} model')
agent.qnetwork_local.load_state_dict(torch.load(f'{algo}_model.pt'))

# track game plays
scores = []
total_tries = 0

while total_tries < TOTAL_PLAYS:
    score = 0
    done = False
    env_info = env.reset(train_mode=False)[brain_name]
    state = env_info.vector_observations[0]
    while not done:
        action = agent.act(state)
        env_info = env.step(action)[brain_name]
Exemplo n.º 7
0
# examine the state space
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(
    states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

# agent = MultiAgent(state_size=33, action_size=4, num_agents=20, random_seed=2)
# agent = MultiAgent(state_size=33, action_size=4, num_agents=1, random_seed=2)
agent = Agent(state_size=33,
              action_size=4,
              random_seed=args.seed,
              num_agents=num_agents,
              buffer_size=args.buffer_size,
              batch_size=args.batch_size,
              lr_actor=args.lr_actor,
              lr_critic=args.lr_critic,
              weight_decay=args.weight_decay,
              update_every=args.update_every,
              update_times=args.update_times,
              gamma=args.gamma)

import matplotlib.pyplot as plt
# %matplotlib inline


def ddpg(n_episodes=1000, max_t=1200, print_every=100):
    scores_deque = deque(maxlen=print_every)
    scores = []
    times = []
    for i_episode in range(1, n_episodes + 1):
Exemplo n.º 8
0
def run_training_loop(checkpoint_file: str, tp: TrainingParams, algo: str):
    """
    Setup environment for agent to interact and learn

    Params
    ======
    checkpoint_file: file name for check pointing. 
    tp: a set of parameters for training 
    algo: algorithm to use : ddqn or dqn
    """
    # setup environment
    env = UnityEnvironment(file_name="./Banana.app")
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # get the state_size and action_size of the env
    action_size = brain.vector_action_space_size
    env_info = env.reset(train_mode=True)[brain_name]
    state = env_info.vector_observations[0]
    state_size = len(state)

    # create the agent
    agent = Agent(state_size, action_size, seed=10, algo=algo)

    scores = []
    scores_window = deque(maxlen=100)
    best_avg_score = 0.0
    eps = tp.eps_start

    print(f'Training parameters:\n{tp}')

    for i_episode in range(1, tp.n_episodes + 1):
        env_info = env.reset(train_mode=True)[brain_name]
        state = env_info.vector_observations[0]

        score = 0.0
        for t in range(tp.max_t):
            action = agent.act(state, eps)
            env_info = env.step(action)[
                brain_name]  # send the action to the environment
            next_state = env_info.vector_observations[0]  # get the next state
            reward = env_info.rewards[0]  # get the reward
            done = env_info.local_done[0]  # see if episode has finished
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break
        scores_window.append(score)
        scores.append(score)
        eps = max(tp.eps_end, eps * tp.eps_decay)
        print(f'\rEpisode {i_episode}\tScore: {score}')
        if i_episode % 100 == 0:
            print('\rEpisode {},\tAverage Score: {:.2f}'.format(
                i_episode, np.mean(scores_window)))
        if i_episode > 100 and np.mean(scores_window) > best_avg_score:
            print(
                f'\nBest avg score improved in last 100 of {i_episode} episodes! Average Score: {np.mean(scores_window):.2f}. Check pointing.'
            )
            torch.save(agent.qnetwork_local.state_dict(), checkpoint_file)
            best_avg_score = np.mean(scores_window)

    env.close()
    return scores