Пример #1
0
                    type=int,
                    default=100000,
                    help="Number of timesteps to train.")
parser.add_argument("--stop-reward",
                    type=float,
                    default=35.0,
                    help="Reward at which we stop training.")

if __name__ == "__main__":
    args = parser.parse_args()
    ray.init()

    # episode-len=100
    # num-envs=4 (note that these are fake-envs as the MockVectorEnv only
    # carries a single CartPole sub-env in it).
    tune.register_env("custom_vec_env", lambda env_ctx: MockVectorEnv(100, 4))

    config = {
        "env": "custom_vec_env",
        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "num_workers": 2,  # parallelism
        "framework": args.framework,
    }

    stop = {
        "training_iteration": args.stop_iters,
        "timesteps_total": args.stop_timesteps,
        "episode_reward_mean": args.stop_reward,
    }