示例#1
0
def no_victim_finetuning():
    # TODO only have one of these, since they're fully determined from each other
    shape = "(64,)"
    shape_split = make_tuple(shape)
    if len(shape_split) == 0:
        raise Exception("didn't parse shape right")
    shape_target = []
    shape_target.extend(shape_split)
    shape_target.extend(shape_split)
    adv_index = 1
    victim_index = 0
    gamma = 0.95
    lr = 0.0065
    batch_size = 2000
    adversary_policy = get_pg_pol
    victim_policy = get_pg_pol
    adversary_trainer = partial(get_pg_train, gamma=gamma, lr=lr, batch_size=batch_size, shape=[])
    victim_trainer = partial(get_pg_train, gamma=gamma, lr=lr, batch_size=batch_size, shape=[])
    victim_policy_key = "baseline"
    adversary_type_key = "total"
    dir_loc = TRAINING_SAVE_DIR
    env = "tictactoe-v0"
    adversary_training_iters = 800
    victim_finetuning_iters = 0
    outer_loop_iters = 1
    dim = 10
    # Note that this currently isn't smart enough to ensure that your adversary and victim algs during training are
    # compatible with the ones you've chosen here, and they need to be
    prev_run = latest_training_sacred_dir(dir_loc)
    victim_trainer_func = partial(possibly_pretrained_trainer, load_weights=True) # victim reloads all weights
    adversary_trainer_func = partial(possibly_pretrained_trainer, load_weights=False) # adversary reloads no weights
    reload_checkpoint_index = None
示例#2
0
def paper_config_vul_tot():
    shape = "(64,)"
    shape_split = make_tuple(shape)
    lr = 0.0001
    adversary_policy=partial(get_hierarchical_pg_pol, core_policy_hiddens=(shape_split), redundant_policy_hiddens=(shape_split))
    adversary_training_iters= 800
    outer_loop_iters=1
    adversary_type_key = "total"
示例#3
0
def paper_config_itr_adv_train_tot():
    shape = "(64,)"
    shape_split = make_tuple(shape)
    lr = 0.0001
    adversary_policy = partial(get_hierarchical_pg_pol, core_policy_hiddens=(shape_split), redundant_policy_hiddens=(shape_split))
    adversary_trainer_func = partial(possibly_pretrained_trainer, load_weights=False)
    adversary_training_iters = 800
    outer_loop_iters = 10
    victim_finetuning_iters = 10
    adversary_type_key = "iat_total"
    dir_loc = None
示例#4
0
def paper_sys_ablation():
    """
    Reinitializes core weights, resets redundant weights to pretrained values, trains both (nothing frozen)
    """
    shape = "(64,)"
    shape_split = make_tuple(shape)
    lr = 0.0001
    adversary_policy = partial(get_hierarchical_pg_pol, core_policy_hiddens=(shape_split), redundant_policy_hiddens=(shape_split))
    adversary_trainer_func = partial(possibly_pretrained_trainer, load_weights=True,
                                     pretrained_weight_keywords=['core'])
    adversary_type_key = "total_core_pretrained"
示例#5
0
def paper_config_vul_str():
    shape = "(64,)"
    shape_split = make_tuple(shape)
    lr = 0.0001
    adversary_policy = partial(get_hierarchical_pg_pol, core_policy_hiddens=(shape_split), redundant_policy_hiddens=(shape_split),
                               redundant_trainable=False)
    adversary_trainer_func = partial(possibly_pretrained_trainer, load_weights=True,
                                     pretrained_weight_keywords=['redundant'])
    adversary_training_iters = 800
    outer_loop_iters = 1
    adversary_type_key = "strategic"
    dir_loc=None