if not os.path.exists(zoo_dir):
    raise Exception('zoo directory does not exist')

  # zoo directory must contain pretrained and extended models
  pretrained_dir = zoo_dir / "pretrained"
  extended_dir = zoo_dir / "extended"
  # Example:
  # reward_wrapper_dir = zoo_dir / "reward_wrapper"

  # Add the test agents you would like to evaluate to the test_agents dictionary { "model_name": "path/to/model"}
  # You may delete the existing ones in the test_agents dictionary (they are just examples)
  test_agents = {
      "baseline": BaselinePolicy(),
      "ppo": PPOPolicy(pretrained_dir / "ppo.zip"),
      "ppo_sp": PPOPolicy(pretrained_dir / "ppo_sp.zip"),
      "ga_sp": makeSlimePolicyLite(pretrained_dir / "ga_sp.json"),
      "random": RandomPolicy(),
  }

  if args.benchmark and not args.evaltest:
    pretrained_agents = {
      "baseline": BaselinePolicy(),
      "ppo": PPOPolicy(pretrained_dir / "ppo.zip"),
      "ppo_sp": PPOPolicy(pretrained_dir / "ppo_sp.zip"),
      "ga_sp": makeSlimePolicyLite(pretrained_dir / "ga_sp.json"),
      "random": RandomPolicy(),
    }

    extended_agents = {
      "ppo_extended": PPOPolicy(extended_dir / "ppo.zip"),
      "ppo_sp_extended": PPOPolicy(extended_dir / "ppo_sp.zip"),
    if not os.path.exists(zoo_dir):
        raise Exception('zoo directory does not exist')

    # zoo directory must contain pretrained and extended models
    pretrained_dir = zoo_dir / "pretrained"
    extended_dir = zoo_dir / "extended"
    # Example:
    # reward_wrapper_dir = zoo_dir / "reward_wrapper"
    obs_small_always_dir = zoo_dir / "obs_small_always"

    # Add the test agents you would like to evaluate to the test_agents dictionary { "model_name": "path/to/model"}
    # You may delete the existing ones in the test_agents dictionary (they are just examples)
    test_agents = {
        "ppo": PPOPolicy(obs_small_always_dir / "ppo.zip"),
        "ppo_sp": PPOPolicy(obs_small_always_dir / "ppo_sp.zip"),
        "ga_sp": makeSlimePolicyLite(obs_small_always_dir / "ga_sp.json"),
    }

    if args.benchmark and not args.evaltest:
        pretrained_agents = {
            "baseline": BaselinePolicy(),
            "ppo": PPOPolicy(pretrained_dir / "ppo.zip"),
            "ppo_sp": PPOPolicy(pretrained_dir / "ppo_sp.zip"),
            "ga_sp": makeSlimePolicyLite(pretrained_dir / "ga_sp.json"),
            "random": RandomPolicy(),
        }

        extended_agents = {
            "ppo_extended": PPOPolicy(extended_dir / "ppo.zip"),
            "ppo_sp_extended": PPOPolicy(extended_dir / "ppo_sp.zip"),
            "ga_sp_extended": makeSlimePolicyLite(extended_dir / "ga_sp.json"),
  pretrained_dir = zoo_dir / "pretrained"
  extended_dir = zoo_dir / "extended"
  # Example:
  # reward_wrapper_dir = zoo_dir / "reward_wrapper"
  spike_dir = zoo_dir / "spike"
  arc_dir = zoo_dir / "arc"


  # Add the test agents you would like to evaluate to the test_agents dictionary { "model_name": "path/to/model"}
  # You may delete the existing ones in the test_agents dictionary (they are just examples)
  test_agents_1 = {
      "ppo_spike_best": PPOPolicy(spike_dir / "ppo_best.zip"),
      "ppo_sp_spike_best": PPOPolicy(spike_dir / "ppo_sp_best.zip"),
      "ppo_spike_final": PPOPolicy(spike_dir / "ppo_final.zip"),
      "ppo_sp_spike_final": PPOPolicy(spike_dir / "ppo_sp_final.zip"),
      "ga_sp_spike": makeSlimePolicyLite(spike_dir / "ga_sp.json"),
  }

  test_agents_2 = {
      "ppo_arc_best": PPOPolicy(arc_dir / "ppo_best.zip"),
      "ppo_sp_arc_best": PPOPolicy(arc_dir / "ppo_sp_best.zip"),
      "ppo_arc_final": PPOPolicy(arc_dir / "ppo_final.zip"),
      "ppo_sp_arc_final": PPOPolicy(arc_dir / "ppo_sp_final.zip"),
      "ga_sp_arc": makeSlimePolicyLite(arc_dir / "ga_sp.json"),
  }

  if args.benchmark and not args.evaltest:
    pretrained_agents = {
      "baseline": BaselinePolicy(),
      "ppo": PPOPolicy(pretrained_dir / "ppo.zip"),
      "ppo_sp": PPOPolicy(pretrained_dir / "ppo_sp.zip"),