Exemplo n.º 1
0
def _env_victim(envs=None):
    if envs is None:
        envs = BANSAL_GOOD_ENVS
    env_and_victims = [[(env, i + 1)
                        for i in range(gym_compete.num_zoo_policies(env))]
                       for env in envs]
    return list(itertools.chain(*env_and_victims))
Exemplo n.º 2
0
def _finetune_configs(envs=None, dual_defense=False):
    """Generates configs for finetuning a Zoo model.

    Note in this setup, the adversary is the embedded agent, whereas usually the victim is.
    :param envs: A list of envs; if set to None, uses all BANSAL_GOOD_ENVS
    :param dual_defense: If True, fine-tune against both an adversary and Zoo agent (randomly
        selected per episode); if False, fine-tune against just the adversary.
    """
    if envs is None:
        envs = BANSAL_GOOD_ENVS
    configs = []
    adversary_paths = get_adversary_paths()
    for env in envs:
        original_embed_index = VICTIM_INDEX[env]
        num_zoo = gym_compete.num_zoo_policies(env)
        for original_victim in range(1, num_zoo + 1):
            original_victim = str(original_victim)
            load_policy = {"type": "zoo", "path": original_victim}

            adversary = _get_policy_path(adversary_paths, env,
                                         str(original_embed_index),
                                         original_victim)

            if dual_defense:
                # If training both best adversary and Zoo, try each possible Zoo agent
                for finetuning_zoo in range(1, num_zoo + 1):
                    finetuning_zoo = str(finetuning_zoo)
                    embed_paths = [adversary, finetuning_zoo]
                    embed_types = ["ppo2", "zoo"]
                    configs.append((env, embed_paths, embed_types,
                                    1 - original_embed_index, load_policy))
            else:
                configs.append((env, [adversary], ["ppo2"],
                                1 - original_embed_index, load_policy))
    return configs
Exemplo n.º 3
0
def _train_against_finetuned_configs(finetune_run,
                                     envs=None,
                                     from_scratch=True):
    """Train an adversary against an adversarially-finetuned Zoo agent.

    :param finetune_run: An experiment name (or <experiment_name/experiment_timestamp>)
    representing the finetuned Zoo agent you'd like to train against. This method assumes that
    highest_win_rate.py has been run, and takes the best-performing finetuned agent for each
    (env, zoo_id) combination.
    :param envs: A list of envs; if set to None, uses all BANSAL_GOOD_ENVS
    :param from_scratch: If True, trains an adversary from random initialization; if False,
        finetunes an adversary starting with the already-existing adversary.
    :return:
    """

    if envs is None:
        envs = BANSAL_GOOD_ENVS
    configs = []
    finetuned_paths = _get_path_from_exp_name(finetune_run)
    adversary_paths = get_adversary_paths()
    for env in envs:
        embed_index = VICTIM_INDEX[env]
        finetuned_embed_index = 1 - embed_index
        num_zoo = gym_compete.num_zoo_policies(env)
        for original_victim in range(1, num_zoo + 1):
            original_victim = str(original_victim)
            finetuned_victim = _get_policy_path(finetuned_paths, env,
                                                finetuned_embed_index,
                                                original_victim)
            if not finetuned_victim:
                continue

            if from_scratch:
                load_policy = {"type": "ppo2", "path": None}
            else:
                adversary = _get_policy_path(adversary_paths, env, embed_index,
                                             original_victim)
                assert adversary is not None

                load_policy = {"type": "ppo2", "path": adversary}

            configs.append((env, finetuned_victim, embed_index, load_policy))

    return configs
Exemplo n.º 4
0
def _zoo(env, agent_index):
    """Returns all Zoo policies in `env`."""
    del agent_index
    num_zoo = gym_compete.num_zoo_policies(env)
    return [("zoo", str(i)) for i in range(1, num_zoo + 1)]