import gym from ac import Agent import numpy as np from monitor import Monitor from trainer import Trainer from smoothie import Smoothie agent = Agent(0.00001, 0.00005) env = gym.make("LunarLander-v2") score_history = [] num_episodes = 10000 monitor = Monitor("monitor.csv", title = title, metadata = dict( gamma=0.99, agent_class=Agent.__name__, pretrain=0, copies=1, report_interval=1, comment = "Single agent training", environment = env.__class__.__name__ ), plots=[ [ { "label": "min train score", "line_width": 1.0 }, { "label": "train score" },
import gym from ac import Agent import numpy as np from monitor import Monitor agent = Agent(0.00001, 0.00005) env = gym.make("LunarLander-v2") score_history = [] num_episodes = 2000 monitor = Monitor("monitor.csv") monitor.start_server(8080) for t in range(num_episodes): done = False observation = env.reset() score = 0.0 while not done: action = agent.choose_action(observation) observation_, reward, done, info = env.step(action) agent.learn(observation, action, reward, observation_, done) observation = observation_ score += reward score_history.append(score) avg_score = np.mean(score_history[-100:]) print("Episode:", t, " score:", score, " average score:", avg_score) monitor.add(t, average_score = avg_score)
import numpy as np import getopt, sys from envs import make_env from ac import ACAgent as Agent np.set_printoptions(precision=3) opts, args = getopt.getopt(sys.argv[1:], "vn:l:") opts = dict(opts) num_tests = int(opts.get("-n", 100)) do_render = "-v" in opts load_from = opts.get("-l") env = make_env(args[0]) num_actions = env.action_space.n observation_shape = env.observation_space.shape assert len(observation_shape) == 1 observation_dim = observation_shape[0] agent = Agent(observation_dim, num_actions, 0.00001, 0.00005) if load_from: agent.load(load_from) print("\n\nAgent weights loaded from:", load_from, "\n\n") for t in range(num_tests): score, _ = agent.run_episode(env, learn=True, render=do_render) print(score)
env_name = args[0] opts = dict(opts) report_interval = int(opts.get("-r", 1)) test_interval = int(opts.get("-t", 100)) render = "-v" in opts gamma = float(opts.get("-g", 0.99)) comment = opts.get("-c", "") env = make_env(env_name) num_actions = env.action_space.n observation_shape = env.observation_space.shape assert len(observation_shape) == 1 observation_dim = observation_shape[0] agent = Agent(observation_dim, num_actions, 0.00001, 0.00005, gamma=gamma) title = opts.get( "-T", "Training agent %s in %s" % (agent.__class__.__name__, env_name)) score_history = [] num_episodes = 10000 monitor = Monitor("monitor.csv", title=title, metadata=dict(gamma=0.99, agent_class=Agent.__name__, pretrain=0, copies=1, report_interval=1, comment=comment, environment=env.__class__.__name__),