Exemplo n.º 1
0
def agent_index_suffix(env_name, victim_name, opponent_name):
    if not gym_compete.is_symmetric(env_name):
        if victim_name.startswith('Zoo'):
            victim_name = f'{victim_name[:-1]}V{victim_name[-1]}'
        if opponent_name.startswith('Zoo'):
            opponent_name = f'{opponent_name[:-1]}O{opponent_name[-1]}'
    return env_name, victim_name, opponent_name
Exemplo n.º 2
0
def pretty_policy_type(env_name, short, is_victim, is_masked, policy_type, policy_path):
    if policy_type == 'zero':
        friendly, code = 'Lifeless', 'Zero'
    elif policy_type == 'random':
        friendly, code = 'Random', 'Rand'
    elif policy_type == 'ppo2':
        try:
            path_components = policy_path.split(osp.sep)
            experiment_root = osp.sep.join(path_components[:-4])
            cfg = read_sacred_config(experiment_root, 'train')
            victim_path = cfg['victim_path']
        except (IndexError, FileNotFoundError):
            victim_path = ''
        friendly, code = 'Adversary', f'Adv{victim_path}'
    elif policy_type == 'zoo':
        if not is_symmetric(env_name):
            prefix = 'ZooV' if is_victim else 'ZooO'
        else:
            prefix = 'Zoo'
        friendly = 'Normal'
        if is_masked:
            prefix += 'M'
            friendly = 'Masked'
        code = f'{prefix}{policy_path}'
    else:
        raise ValueError(f"Unrecognized policy type '{policy_type}'")

    if short:
        return friendly
    else:
        return f'{friendly} ({code})'
def plot_baselines(env_name, victim_path, ycol, ax, baseline):
    victim_name = f'Zoo{victim_path}' if is_symmetric(
        env_name) else f'ZooV{victim_path}'
    scores = baseline.loc[(env_name, victim_name), :]
    num_episodes = util.num_episodes(scores)
    scores = scores / num_episodes * 100  # convert to percent

    scores = scores[ycol]
    zoo_mask = scores.index.str.startswith('Zoo')
    zoo_score = scores.loc[zoo_mask].max()
    scores['Zoo'] = zoo_score
    scores = scores.loc[['Zoo', 'Rand', 'Zero']]

    num_lines = len(ax.get_legend_handles_labels()[0])
    for i, (opponent, score) in enumerate(scores.items()):
        label = PRETTY_LABELS[opponent]
        ax.axhline(y=score,
                   label=label,
                   color=f'C{num_lines + i}',
                   linewidth=1,
                   linestyle=LINESTYLES[opponent])
Exemplo n.º 4
0
def pretty_policy_type(env_name, is_victim, policy_type, policy_path):
    if policy_type == 'zero':
        return 'Lifeless (Zero)'
    elif policy_type == 'random':
        return 'Random (Rand)'
    elif policy_type == 'ppo2':
        try:
            path_components = policy_path.split(osp.sep)
            experiment_root = osp.sep.join(path_components[:-4])
            cfg = read_sacred_config(experiment_root, 'train')
            victim_path = cfg['victim_path']
        except (IndexError, FileNotFoundError):
            victim_path = ''
        return f'Adversary (Adv{victim_path})'
    elif policy_type == 'zoo':
        if not is_symmetric(env_name):
            prefix = 'ZooV' if is_victim else 'ZooO'
        else:
            prefix = 'Zoo'
        return f'Normal ({prefix}{policy_path})'
    else:
        raise ValueError(f"Unrecognized policy type '{policy_type}'")