if args.teleop: assert args.algo == "sac", "Teleoperation mode is not yet implemented for {}".format( args.algo) env = TeleopEnv(env, is_training=True) model.set_env(env) env.model = model kwargs = {} if args.log_interval > -1: kwargs = {'log_interval': args.log_interval} if args.algo == 'sac': kwargs.update({ 'callback': create_callback(args.algo, os.path.join(save_path, ENV_ID + "_best"), verbose=1) }) model.learn(n_timesteps, **kwargs) if args.teleop: env.wait() env.exit() time.sleep(0.5) else: # Close the connection properly env.reset() if isinstance(env, VecFrameStack): env = env.venv # HACK to bypass Monitor wrapper
exp_folder = args.trained_agent.split('.pkl')[0] if normalize: print("Loading saved running average") env.load_running_average(exp_folder) else: # Train an agent from scratch model = ALGOS[args.algo](env=env, tensorboard_log=tensorboard_log, verbose=1, **hyperparams) kwargs = {} if args.log_interval > -1: kwargs.update({'log_interval': args.log_interval}) if args.algo == 'sac': kwargs.update({'callback': create_callback(args.algo, os.path.join(save_path, ENV_ID + "_best"), verbose=1)}) kwargs.update({'save_path': save_path}) # Base policy agent = None if args.base_policy_path != '': print("Loading Base Policy for JIRL ...") agent = keras.models.load_model(args.base_policy_path) kwargs.update({'base_policy': agent}) kwargs.update({'expert_guidance_steps': args.expert_guidance_steps}) kwargs.update({'joystick': js}) # Train agent using JIRL model.learn_jirl(n_timesteps, **kwargs)