示例#1
0
文件: train.py 项目: imandr/KeRLas
import gym
from ac import Agent
import numpy as np
from monitor import Monitor
from trainer import Trainer
from smoothie import Smoothie

agent = Agent(0.00001, 0.00005)
env = gym.make("LunarLander-v2")
score_history = []
num_episodes = 10000
monitor = Monitor("monitor.csv",
    title = title,
    metadata = dict(
        gamma=0.99,
        agent_class=Agent.__name__,
        pretrain=0,
        copies=1,
        report_interval=1,
        comment = "Single agent training",
        environment = env.__class__.__name__
    ),
    plots=[
    [
        {
            "label":        "min train score",
            "line_width":   1.0
        },
        {
            "label":        "train score"
        },
示例#2
0
import gym
from ac import Agent
import numpy as np
from monitor import Monitor

agent = Agent(0.00001, 0.00005)
env = gym.make("LunarLander-v2")
score_history = []
num_episodes = 2000
monitor = Monitor("monitor.csv")
monitor.start_server(8080)

for t in range(num_episodes):
    done = False
    observation = env.reset()
    score = 0.0
    
    while not done:
        action = agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        agent.learn(observation, action, reward, observation_, done)
        observation = observation_
        score += reward
    
    score_history.append(score)
    avg_score = np.mean(score_history[-100:])
    print("Episode:", t, "  score:", score, "  average score:", avg_score)
    monitor.add(t, average_score = avg_score)
示例#3
0
import numpy as np
import getopt, sys
from envs import make_env
from ac import ACAgent as Agent

np.set_printoptions(precision=3)

opts, args = getopt.getopt(sys.argv[1:], "vn:l:")
opts = dict(opts)
num_tests = int(opts.get("-n", 100))
do_render = "-v" in opts
load_from = opts.get("-l")

env = make_env(args[0])
num_actions = env.action_space.n
observation_shape = env.observation_space.shape
assert len(observation_shape) == 1
observation_dim = observation_shape[0]

agent = Agent(observation_dim, num_actions, 0.00001, 0.00005)
if load_from:
    agent.load(load_from)
    print("\n\nAgent weights loaded from:", load_from, "\n\n")

for t in range(num_tests):
    score, _ = agent.run_episode(env, learn=True, render=do_render)
    print(score)
示例#4
0
文件: train.py 项目: imandr/KeRLas
env_name = args[0]

opts = dict(opts)
report_interval = int(opts.get("-r", 1))
test_interval = int(opts.get("-t", 100))
render = "-v" in opts
gamma = float(opts.get("-g", 0.99))
comment = opts.get("-c", "")

env = make_env(env_name)
num_actions = env.action_space.n
observation_shape = env.observation_space.shape
assert len(observation_shape) == 1
observation_dim = observation_shape[0]

agent = Agent(observation_dim, num_actions, 0.00001, 0.00005, gamma=gamma)

title = opts.get(
    "-T", "Training agent %s in %s" % (agent.__class__.__name__, env_name))

score_history = []
num_episodes = 10000
monitor = Monitor("monitor.csv",
                  title=title,
                  metadata=dict(gamma=0.99,
                                agent_class=Agent.__name__,
                                pretrain=0,
                                copies=1,
                                report_interval=1,
                                comment=comment,
                                environment=env.__class__.__name__),