Exemplo n.º 1
0
    def debug_one_each_type(score):
        """One Zoo agent from each environment, plus one opponent of each type.
        Intended for debugging purposes as a quick experiment that is still diverse.."""
        score = dict(score)
        score["episodes"] = 2
        spec = {
            "config": {
                PATHS_AND_TYPES:
                tune.grid_search([
                    cfg for cfg in _gen_configs(victim_fns=[_zoo],
                                                opponent_fns=[_zoo])
                    if cfg.agent_a_path == "1" and cfg.agent_b_path == "1"
                ] + [
                    cfg for cfg in _gen_configs(victim_fns=[_zoo],
                                                opponent_fns=[_fixed])
                    if cfg.agent_a_path == "1" or cfg.agent_b_path == "1"
                ] + _gen_configs(
                    victim_fns=[_zoo],
                    opponent_fns=[_from_paths(get_adversary_paths())],
                )[0:1], ),
            },
        }
        exp_suffix = "debug_one_each_type"

        _ = locals()  # quieten flake8 unused variable warning
        del _
Exemplo n.º 2
0
def _finetune_configs(envs=None, dual_defense=False):
    """Generates configs for finetuning a Zoo model.

    Note in this setup, the adversary is the embedded agent, whereas usually the victim is.
    :param envs: A list of envs; if set to None, uses all BANSAL_GOOD_ENVS
    :param dual_defense: If True, fine-tune against both an adversary and Zoo agent (randomly
        selected per episode); if False, fine-tune against just the adversary.
    """
    if envs is None:
        envs = BANSAL_GOOD_ENVS
    configs = []
    adversary_paths = get_adversary_paths()
    for env in envs:
        original_embed_index = VICTIM_INDEX[env]
        num_zoo = gym_compete.num_zoo_policies(env)
        for original_victim in range(1, num_zoo + 1):
            original_victim = str(original_victim)
            load_policy = {"type": "zoo", "path": original_victim}

            adversary = _get_policy_path(adversary_paths, env,
                                         str(original_embed_index),
                                         original_victim)

            if dual_defense:
                # If training both best adversary and Zoo, try each possible Zoo agent
                for finetuning_zoo in range(1, num_zoo + 1):
                    finetuning_zoo = str(finetuning_zoo)
                    embed_paths = [adversary, finetuning_zoo]
                    embed_types = ["ppo2", "zoo"]
                    configs.append((env, embed_paths, embed_types,
                                    1 - original_embed_index, load_policy))
            else:
                configs.append((env, [adversary], ["ppo2"],
                                1 - original_embed_index, load_policy))
    return configs
Exemplo n.º 3
0
def _train_against_finetuned_configs(finetune_run,
                                     envs=None,
                                     from_scratch=True):
    """Train an adversary against an adversarially-finetuned Zoo agent.

    :param finetune_run: An experiment name (or <experiment_name/experiment_timestamp>)
    representing the finetuned Zoo agent you'd like to train against. This method assumes that
    highest_win_rate.py has been run, and takes the best-performing finetuned agent for each
    (env, zoo_id) combination.
    :param envs: A list of envs; if set to None, uses all BANSAL_GOOD_ENVS
    :param from_scratch: If True, trains an adversary from random initialization; if False,
        finetunes an adversary starting with the already-existing adversary.
    :return:
    """

    if envs is None:
        envs = BANSAL_GOOD_ENVS
    configs = []
    finetuned_paths = _get_path_from_exp_name(finetune_run)
    adversary_paths = get_adversary_paths()
    for env in envs:
        embed_index = VICTIM_INDEX[env]
        finetuned_embed_index = 1 - embed_index
        num_zoo = gym_compete.num_zoo_policies(env)
        for original_victim in range(1, num_zoo + 1):
            original_victim = str(original_victim)
            finetuned_victim = _get_policy_path(finetuned_paths, env,
                                                finetuned_embed_index,
                                                original_victim)
            if not finetuned_victim:
                continue

            if from_scratch:
                load_policy = {"type": "ppo2", "path": None}
            else:
                adversary = _get_policy_path(adversary_paths, env, embed_index,
                                             original_victim)
                assert adversary is not None

                load_policy = {"type": "ppo2", "path": adversary}

            configs.append((env, finetuned_victim, embed_index, load_policy))

    return configs
Exemplo n.º 4
0
def _adversary():
    """Returns all adversaries from default JSON."""
    return _from_paths(get_adversary_paths())