inner_env = normalize(
            AntMazeEnv(
                maze_id=par.maze_id,
                death_reward=par.death_reward,
                sensor_range=par.sensor_range,
                sensor_span=math.pi * 2,
                ego_obs=True,
                fence=par.fence,
                goal_rew=par.success_reward,
                random_start=par.random_start,
                direct_goal=par.direct_goal,
                velocity_field=par.velocity_field,
            ))
        env = hierarchize_snn(
            inner_env,
            time_steps_agg=time_step_agg,
            pkl_path=par.pkl_path,
            animate=par.animate,
        )

        policy = CategoricalMLPPolicy(env_spec=env.spec, )
        print("env_hier", env.spec)
        if par.baseline_name == 'linear':
            baseline = LinearFeatureBaseline(env_spec=env.spec)
        elif par.baseline_name == 'mlp':
            baseline = GaussianMLPBaseline(env_spec=env.spec)

        batch_size = int(par.low_step_num / time_step_agg)
        max_path_length = int(par.max_low_step / time_step_agg)

        algo = TRPO(
            env=env,
mode = "local"
n_parallel = 4

exp_dir = 'data/local/egoSwimmer-snn/'
for dir in os.listdir(exp_dir):
    if 'Figure' not in dir and os.path.isfile(os.path.join(exp_dir, dir, 'params.pkl')):
        pkl_path = os.path.join(exp_dir, dir, 'params.pkl')
        print("hier for : ", pkl_path)

        for time_step_agg in [10, 50, 100]:

            for activity_range in [6, 10, 15]:
                inner_env = normalize(SwimmerGatherEnv(activity_range=activity_range, sensor_range=activity_range,
                                                       sensor_span=math.pi * 2, ego_obs=True))
                env = hierarchize_snn(inner_env, time_steps_agg=time_step_agg, pkl_path=pkl_path,
                                      # animate=True,
                                      )

                policy = CategoricalMLPPolicy(
                    env_spec=env.spec,
                )

                baseline = LinearFeatureBaseline(env_spec=env.spec)

                # bonus_evaluators = [GridBonusEvaluator(mesh_density=mesh_density, visitation_bonus=1, snn_H_bonus=0)]
                # reward_coef_bonus = [reward_coef]

                algo = TRPO_snn(
                    env=env,
                    policy=policy,
                    baseline=baseline,