def _init(): register_gripper(UltrasoundProbeGripper) env = GymWrapper(suite.make(env_id, **options)) env = Monitor(env) env.seed(seed + rank) return env
model = PPO.load(continue_training_model_path, env=env) # Training model.learn(total_timesteps=training_timesteps, tb_log_name=tb_log_name, callback=checkpoint_callback, reset_num_timesteps=True) # Save trained model model.save(save_model_path) env.save(save_vecnormalize_path) else: # Create evaluation environment env_options['has_renderer'] = True register_gripper(UltrasoundProbeGripper) env_gym = GymWrapper(suite.make(env_id, **env_options)) env = DummyVecEnv([lambda: env_gym]) # Load normalized env env = VecNormalize.load(load_vecnormalize_path, env) # Turn of updates and reward normalization env.training = False env.norm_reward = False # Load model model = PPO.load(load_model_path, env) # Simulate environment obs = env.reset()