Exemplo n.º 1
0
if args.teleop:
    assert args.algo == "sac", "Teleoperation mode is not yet implemented for {}".format(
        args.algo)
    env = TeleopEnv(env, is_training=True)
    model.set_env(env)
    env.model = model

kwargs = {}
if args.log_interval > -1:
    kwargs = {'log_interval': args.log_interval}

if args.algo == 'sac':
    kwargs.update({
        'callback':
        create_callback(args.algo,
                        os.path.join(save_path, ENV_ID + "_best"),
                        verbose=1)
    })

model.learn(n_timesteps, **kwargs)

if args.teleop:
    env.wait()
    env.exit()
    time.sleep(0.5)
else:
    # Close the connection properly
    env.reset()
    if isinstance(env, VecFrameStack):
        env = env.venv
    # HACK to bypass Monitor wrapper
Exemplo n.º 2
0
    exp_folder = args.trained_agent.split('.pkl')[0]
    if normalize:
        print("Loading saved running average")
        env.load_running_average(exp_folder)
else:
    # Train an agent from scratch
    model = ALGOS[args.algo](env=env, tensorboard_log=tensorboard_log, verbose=1, **hyperparams)

kwargs = {}
if args.log_interval > -1:
    kwargs.update({'log_interval': args.log_interval})

if args.algo == 'sac':
    kwargs.update({'callback': create_callback(args.algo,
                                               os.path.join(save_path, ENV_ID + "_best"),
                                               verbose=1)})
kwargs.update({'save_path': save_path})

# Base policy
agent = None
if args.base_policy_path != '':
    print("Loading Base Policy for JIRL ...")
    agent = keras.models.load_model(args.base_policy_path)
    kwargs.update({'base_policy': agent})
    kwargs.update({'expert_guidance_steps': args.expert_guidance_steps})
    kwargs.update({'joystick': js})

    # Train agent using JIRL
    model.learn_jirl(n_timesteps, **kwargs)