def train_model(args):
    # We are using custom model and environment, which need to be registered in ray/rllib
    # Names can be anything.
    register_env("DuckieTown-MultiMap",
                 lambda _: DiscreteWrapper(MultiMapEnv()))

    # Define trainer. Apart from env, config/framework and config/model, which are common among trainers.
    # Here is a list of default config keys/values:
    # https://docs.ray.io/en/master/rllib-training.html#common-parameters
    # For DQN specifically there are also additionally these keys:
    # https://docs.ray.io/en/master/rllib-algorithms.html#dqn
    trainer = DQNTrainer(
        env="DuckieTown-MultiMap",
        config={
            "framework": "torch",
            "model": {
                "custom_model": "image-dqn",
            },
            "learning_starts": 500,
            # Doing this allows us to record images from the DuckieTown Gym! Might be useful for report.
            # "record_env": True,
            "train_batch_size": 16,
            # Use a very small buffer to reduce memory usage, default: 50_000.
            "buffer_size": 1000,
            # Dueling off
            "dueling": False,
            # No hidden layers
            "hiddens": [],
            # Don't save experiences.
            # "output": None,
            # "compress_observations": True,
            "num_workers": 0,
            "num_gpus": 0.5,
            "rollout_fragment_length": 50,
        })

    # Start training from a checkpoint, if available.
    if args.model_path:
        trainer.restore(args.model_path)

    plot = plotter.Plotter('dqn_agent')
    for i in range(args.epochs):  # Number of episodes (basically epochs)
        print(
            f'----------------------- Starting epoch {i} ----------------------- '
        )
        # train() trains only a single episode
        result = trainer.train()
        print(result)
        plot.add_results(result)

        # Save model so far.
        checkpoint_path = trainer.save()
        print(f'Epoch {i}, checkpoint saved at: {checkpoint_path}')

        # Cleanup CUDA memory to reduce memory usage.
        torch.cuda.empty_cache()
        # Debug log to monitor memory.
        print(torch.cuda.memory_summary(device=None, abbreviated=False))

    plot.plot('DQN DuckieTown-MultiMap')
Exemplo n.º 2
0
def main():

    # Get the environment and extract the number of actions.
    print("Using environment", environment_name)
    environment = gym.make(environment_name)
    environment = DiscreteWrapper(environment)
    np.random.seed(666)
    nb_actions = environment.action_space.n

    # Build the model.
    model = build_model((WINDOW_LENGTH, ) + INPUT_SHAPE, nb_actions)
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
    processor = DuckieTownProcessor()

    # Select a policy. We use eps-greedy action selection, which means that a random action is selected
    # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
    # the agent initially explores the environment (high eps) and then gradually sticks to what it knows
    # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
    # so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr='eps',
        value_max=1.,
        value_min=.1,
        value_test=.05,
        #nb_steps=1000000
        nb_steps=400000)

    # The trade-off between exploration and exploitation is difficult and an on-going research topic.
    # If you want, you can experiment with the parameters or use a different policy. Another popular one
    # is Boltzmann-style exploration:
    # policy = BoltzmannQPolicy(tau=1.)
    # Feel free to give it a try!

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=50000,
                   gamma=.99,
                   target_model_update=10000,
                   train_interval=4,
                   delta_clip=1.)
    dqn.compile(optimizers.Adam(lr=.00025), metrics=['mae'])

    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
    callbacks = []
    dqn.fit(
        environment,
        callbacks=callbacks,
        #nb_steps=1750000,
        nb_steps=500000,
        log_interval=10000,
        visualize=True)
Exemplo n.º 3
0
 def get_env():
     # Simulator env uses a single map, so better for evaluation/testing.
     # DiscreteWrapper just converts wheel velocities to high level discrete actions.
     return DiscreteWrapper(simulator.Simulator(
         map_name=args.map,
         max_steps=2000,
     ))
Exemplo n.º 4
0
    def creator():
        if args.env_name is None:
            env = DuckietownEnv(
                map_name=args.map_name,
                draw_curve=args.draw_curve,
                draw_bbox=args.draw_bbox,
                domain_rand=args.domain_rand,
                frame_skip=args.frame_skip,
                distortion=args.distortion,
            )
        else:
            env = gym.make(args.env_name)

        return DiscreteWrapper(env) if discrete else env
Exemplo n.º 5
0
def train_model(args):
    # We are using custom model and environment, which need to be registered in ray/rllib
    # Names can be anything.
    register_env("DuckieTown-MultiMap", lambda _: DiscreteWrapper(MultiMapEnv()))

    # Define trainer. Apart from env, config/framework and config/model, which are common among trainers.
    trainer = PPOTrainer(
        env="DuckieTown-MultiMap",
        config={
            "framework": "torch",
            "model": {
                "custom_model": "image-ppo",
            },
            "sgd_minibatch_size": 64,
            "output": None,
            "compress_observations": True,
            "num_workers": 0,
        }
    )

    # Start training from a checkpoint, if available.
    if args.model_path:
        trainer.restore(args.model_path)

    plot = plotter.Plotter('ppo_agent')
    for i in range(args.epochs):  # Number of episodes (basically epochs)
        print(f'----------------------- Starting epoch {i} ----------------------- ')
        # train() trains only a single episode
        result = trainer.train()
        print(result)
        plot.add_results(result)

        # Save model so far.
        checkpoint_path = trainer.save()
        print(f'Epoch {i}, checkpoint saved at: {checkpoint_path}')

        # Cleanup CUDA memory to reduce memory usage.
        torch.cuda.empty_cache()
        # Debug log to monitor memory.
        print(torch.cuda.memory_summary(device=None, abbreviated=False))

    plot.plot('PPO DuckieTown-MultiMap')
def main():

    weights_filename = "dqn_Duckietown-4way-v0_weights.h5f"

    # Get the environment and extract the number of actions.
    environment_name = weights_filename.split("_")[
        1]  #"Duckietown-straight_road-v0"
    environment = gym.make(environment_name)
    environment = DiscreteWrapper(environment)
    np.random.seed(666)
    nb_actions = environment.action_space.n

    # Build the model.
    model = build_model((WINDOW_LENGTH, ) + INPUT_SHAPE, nb_actions)
    print(model.summary())

    # Create memory.
    memory = SequentialMemory(
        limit=1000000,
        window_length=WINDOW_LENGTH)  # TODO Why is this necessary?

    # Create the processor.
    processor = DuckieTownProcessor()

    # Create the DQN-Agent.
    dqn = DQNAgent(
        model=model,
        nb_actions=nb_actions,
        memory=memory,
        processor=processor,
    )
    dqn.target_model = dqn.model  # TODO Why is this necessary?
    dqn.compile(optimizers.Adam(lr=.00025),
                metrics=['mae'])  # TODO Why is this necessary?

    # Load the weights.
    dqn.load_weights(weights_filename)

    # Test the agent.
    dqn.test(environment, nb_episodes=10, visualize=True)
                        help='Location to pre-trained model')

    return parser.parse_args()


if __name__ == '__main__':
    args = get_parser()

    # Start ray
    ray.init()
    ModelCatalog.register_custom_model("image-dqn", RLLibDQNCritic)

    # We are using custom model and environment, which need to be registered in ray/rllib
    # Names can be anything.
    register_env("DuckieTown-MultiMap",
                 lambda _: DiscreteWrapper(MultiMapEnv()))

    csv_path = "searches/dqn_results.csv"
    starting_idx = 0
    if os.path.exists(csv_path):
        with open(csv_path, mode="r") as f:
            starting_idx = len(f.readlines())

    for search_idx in trange(args.n_searches, desc="Searches"):
        config = {
            "framework": "torch",
            "model": {
                "custom_model": "image-dqn",
            },
            "learning_starts": 500,
            # "record_env": True,  # Doing this allows us to record images from the DuckieTown Gym! Might be useful for report.  # noqa: E501