import gym import numpy as np import time import agents from copy import deepcopy if __name__ == '__main__': numTrials = 400 seed = 1 environ = gym.make("LunarLander-v2") environ.seed(1) np.random.seed(1) lander = agents.deepQAgent(environ.observation_space, environ.action_space, environ.reward_range) for trial in range(numTrials): numTimeSteps = environ.spec.timestep_limit totalReward = 0 state = environ.reset() state = lander.scaleStates(state) currentState = np.copy(state) listOfStates = [currentState] listOfActions = [] for timeStep in range(numTimeSteps): action = lander.getAction(environ, currentState, trial) (newState, reward, checkFinished, notNeeded) = environ.step(action)
"probupdate": .25, "lambda": 0.15, "past": 0, "eps": 0.45, # Epsilon in epsilon greedy policies "decay": 0.993, # Epsilon decay in epsilon greedy policies "initial_learnrate": 0.012, "decay_learnrate": 0.997, "discount": 0.99, "batch_size": 75, "hiddenlayers": [300], "regularization": [0.00001, 0.00000001], "momentum": 0.05, "file": None, "seed": seed } agent = agents.deepQAgent(env.observation_space, env.action_space, env.reward_range, **params) num_steps = env.spec.timestep_limit avg = 0. oldavg = 0. plt.ion() fig, ax = plt.subplots(1, 2, figsize=(20, 10)) ax[1].set_xlim(-1, 1) ax[1].set_ylim(-1, 1) ax[1].autoscale(False) totrewlist = [] totrewavglist = [] costlist = [] showevery = 10 for episode in range(numepisodes):
"scalereward": 1., "probupdate": .25, "lambda": 0., "past": 0, "eps": 0.45, # Epsilon in epsilon greedy policies "decay": 0.993, # Epsilon decay in epsilon greedy policies "initial_learnrate": 0.005, "decay_learnrate": 0.997, "discount": 0.99, "batch_size": 75, "hiddenlayers": [300], "regularization": [0.0000, 0.0000000], "momentum": 0.0, "file": None, "seed": seed} agent = agents.deepQAgent(env.observation_space, env.action_space, env.reward_range, **params) num_steps = env.spec.timestep_limit avg = 0. oldavg = 0. plt.ion() fig, ax = plt.subplots(1, 2, figsize=(20, 10)) ax[1].set_xlim(-1, 1) ax[1].set_ylim(-1, 1) ax[1].autoscale(False) totrewlist = [] totrewavglist = [] costlist = [] showevery = 10 for episode in range(numepisodes):