Exemplo n.º 1
0
 def testEvaluationOption(self):
     ray.init()
     agent = DQNAgent(env="CartPole-v0", config={"evaluation_interval": 2})
     r0 = agent.train()
     r1 = agent.train()
     r2 = agent.train()
     r3 = agent.train()
     r4 = agent.train()
     self.assertTrue("evaluation" in r0)
     self.assertTrue("episode_reward_mean" in r0["evaluation"])
     self.assertEqual(r0["evaluation"], r1["evaluation"])
     self.assertNotEqual(r1["evaluation"], r2["evaluation"])
     self.assertEqual(r2["evaluation"], r3["evaluation"])
     self.assertNotEqual(r3["evaluation"], r4["evaluation"])
Exemplo n.º 2
0
 def testTrainCartpoleOffPolicy(self):
     register_env(
         "test3", lambda _: PartOffPolicyServing(gym.make("CartPole-v0"),
                                                 off_pol_frac=0.2))
     dqn = DQNAgent(env="test3", config={"exploration_fraction": 0.001})
     for i in range(100):
         result = dqn.train()
         print("Iteration {}, reward {}, timesteps {}".format(
             i, result["episode_reward_mean"], result["timesteps_total"]))
         if result["episode_reward_mean"] >= 100:
             return
     raise Exception("failed to improve reward")
Exemplo n.º 3
0
 def testTrainCartpoleOffPolicy(self):
     register_env(
         "test3", lambda _: PartOffPolicyServing(
             gym.make("CartPole-v0"), off_pol_frac=0.2))
     dqn = DQNAgent(env="test3", config={"exploration_fraction": 0.001})
     for i in range(100):
         result = dqn.train()
         print("Iteration {}, reward {}, timesteps {}".format(
             i, result["episode_reward_mean"], result["timesteps_total"]))
         if result["episode_reward_mean"] >= 100:
             return
     raise Exception("failed to improve reward")
Exemplo n.º 4
0
        "num_gpus": 1,
        "hiddens": [],
        "schedule_max_timesteps": 7500000,
        "timesteps_per_iteration": 4000,
        "exploration_fraction": 0.8,
        "exploration_final_eps": 0.02,
        "lr": 1e-3,
        "model": {
            "custom_model": "parametric",
            "custom_options": {},  # extra options to pass to your model
        }
    })

# Attempt to restore from checkpoint if possible.
if os.path.exists(CHECKPOINT_FILE):
    checkpoint_path = open(CHECKPOINT_FILE).read()
    print("Restoring from checkpoint path", checkpoint_path)
    dqn.restore(checkpoint_path)

# run the new command using the given tracer

# make a report, placing output in the current directory

# Serving and training loop
while True:
    print(pretty_print(dqn.train()))
    checkpoint_path = dqn.save()
    print("Last checkpoint", checkpoint_path)
    with open(CHECKPOINT_FILE, "w") as f:
        f.write(checkpoint_path)
Exemplo n.º 5
0
                "timesteps_per_iteration": 200,
                "env_config": {
                    "observation_size": args.observation_size,
                    "action_size": args.action_size,
                },
            })
    elif args.run == "PG":
        agent = PGAgent(
            env="srv",
            config={
                "num_workers": 0,
                "env_config": {
                    "observation_size": args.observation_size,
                    "action_size": args.action_size,
                },
            })

    # Attempt to restore from checkpoint if possible.
    if os.path.exists(args.checkpoint_file):
        checkpoint_file = open(args.checkpoint_file).read()
        print("Restoring from checkpoint path", checkpoint_file)
        agent.restore(checkpoint_file)

    # Serving and training loop
    while True:
        print(pretty_print(agent.train()))
        checkpoint_file = agent.save()
        print("Last checkpoint", checkpoint_file)
        with open(args.checkpoint_file, "w") as f:
            f.write(checkpoint_file)