Exemplo n.º 1
0
import gym
import numpy as np
import time
import agents
from copy import deepcopy

if __name__ == '__main__':

    numTrials = 400
    seed = 1                           
    environ = gym.make("LunarLander-v2")
    environ.seed(1)
    np.random.seed(1)
    lander = agents.deepQAgent(environ.observation_space, environ.action_space, environ.reward_range)
    
    for trial in range(numTrials):
        
        numTimeSteps = environ.spec.timestep_limit
        totalReward = 0

        state = environ.reset()
        state = lander.scaleStates(state)
        currentState = np.copy(state)

        listOfStates = [currentState]
        listOfActions = []

        for timeStep in range(numTimeSteps):
            action = lander.getAction(environ, currentState, trial)
            (newState, reward, checkFinished, notNeeded) = environ.step(action)
Exemplo n.º 2
0
            "probupdate": .25,
            "lambda": 0.15,
            "past": 0,
            "eps": 0.45,  # Epsilon in epsilon greedy policies
            "decay": 0.993,  # Epsilon decay in epsilon greedy policies
            "initial_learnrate": 0.012,
            "decay_learnrate": 0.997,
            "discount": 0.99,
            "batch_size": 75,
            "hiddenlayers": [300],
            "regularization": [0.00001, 0.00000001],
            "momentum": 0.05,
            "file": None,
            "seed": seed
        }
    agent = agents.deepQAgent(env.observation_space, env.action_space,
                              env.reward_range, **params)
    num_steps = env.spec.timestep_limit
    avg = 0.
    oldavg = 0.

    plt.ion()
    fig, ax = plt.subplots(1, 2, figsize=(20, 10))
    ax[1].set_xlim(-1, 1)
    ax[1].set_ylim(-1, 1)
    ax[1].autoscale(False)

    totrewlist = []
    totrewavglist = []
    costlist = []
    showevery = 10
    for episode in range(numepisodes):
Exemplo n.º 3
0
            "scalereward": 1.,
            "probupdate": .25,
            "lambda": 0.,
            "past": 0,
            "eps": 0.45,  # Epsilon in epsilon greedy policies
            "decay": 0.993,  # Epsilon decay in epsilon greedy policies
            "initial_learnrate": 0.005,
            "decay_learnrate": 0.997,
            "discount": 0.99,
            "batch_size": 75,
            "hiddenlayers": [300],
            "regularization": [0.0000, 0.0000000],
            "momentum": 0.0,
            "file": None,
            "seed": seed}
    agent = agents.deepQAgent(env.observation_space, env.action_space, env.reward_range, **params)
    num_steps = env.spec.timestep_limit
    avg = 0.
    oldavg = 0.

    plt.ion()
    fig, ax = plt.subplots(1, 2, figsize=(20, 10))
    ax[1].set_xlim(-1, 1)
    ax[1].set_ylim(-1, 1)
    ax[1].autoscale(False)

    totrewlist = []
    totrewavglist = []
    costlist = []
    showevery = 10
    for episode in range(numepisodes):