예제 #1
0
 def _init():
     register_gripper(UltrasoundProbeGripper)
     env = GymWrapper(suite.make(env_id, **options))
     env = Monitor(env)
     env.seed(seed + rank)
     return env
예제 #2
0
            model = PPO.load(continue_training_model_path, env=env)

        # Training
        model.learn(total_timesteps=training_timesteps,
                    tb_log_name=tb_log_name,
                    callback=checkpoint_callback,
                    reset_num_timesteps=True)

        # Save trained model
        model.save(save_model_path)
        env.save(save_vecnormalize_path)

    else:
        # Create evaluation environment
        env_options['has_renderer'] = True
        register_gripper(UltrasoundProbeGripper)
        env_gym = GymWrapper(suite.make(env_id, **env_options))
        env = DummyVecEnv([lambda: env_gym])

        # Load normalized env
        env = VecNormalize.load(load_vecnormalize_path, env)

        # Turn of updates and reward normalization
        env.training = False
        env.norm_reward = False

        # Load model
        model = PPO.load(load_model_path, env)

        # Simulate environment
        obs = env.reset()