def no_victim_finetuning(): # TODO only have one of these, since they're fully determined from each other shape = "(64,)" shape_split = make_tuple(shape) if len(shape_split) == 0: raise Exception("didn't parse shape right") shape_target = [] shape_target.extend(shape_split) shape_target.extend(shape_split) adv_index = 1 victim_index = 0 gamma = 0.95 lr = 0.0065 batch_size = 2000 adversary_policy = get_pg_pol victim_policy = get_pg_pol adversary_trainer = partial(get_pg_train, gamma=gamma, lr=lr, batch_size=batch_size, shape=[]) victim_trainer = partial(get_pg_train, gamma=gamma, lr=lr, batch_size=batch_size, shape=[]) victim_policy_key = "baseline" adversary_type_key = "total" dir_loc = TRAINING_SAVE_DIR env = "tictactoe-v0" adversary_training_iters = 800 victim_finetuning_iters = 0 outer_loop_iters = 1 dim = 10 # Note that this currently isn't smart enough to ensure that your adversary and victim algs during training are # compatible with the ones you've chosen here, and they need to be prev_run = latest_training_sacred_dir(dir_loc) victim_trainer_func = partial(possibly_pretrained_trainer, load_weights=True) # victim reloads all weights adversary_trainer_func = partial(possibly_pretrained_trainer, load_weights=False) # adversary reloads no weights reload_checkpoint_index = None
def paper_config_vul_tot(): shape = "(64,)" shape_split = make_tuple(shape) lr = 0.0001 adversary_policy=partial(get_hierarchical_pg_pol, core_policy_hiddens=(shape_split), redundant_policy_hiddens=(shape_split)) adversary_training_iters= 800 outer_loop_iters=1 adversary_type_key = "total"
def paper_config_itr_adv_train_tot(): shape = "(64,)" shape_split = make_tuple(shape) lr = 0.0001 adversary_policy = partial(get_hierarchical_pg_pol, core_policy_hiddens=(shape_split), redundant_policy_hiddens=(shape_split)) adversary_trainer_func = partial(possibly_pretrained_trainer, load_weights=False) adversary_training_iters = 800 outer_loop_iters = 10 victim_finetuning_iters = 10 adversary_type_key = "iat_total" dir_loc = None
def paper_sys_ablation(): """ Reinitializes core weights, resets redundant weights to pretrained values, trains both (nothing frozen) """ shape = "(64,)" shape_split = make_tuple(shape) lr = 0.0001 adversary_policy = partial(get_hierarchical_pg_pol, core_policy_hiddens=(shape_split), redundant_policy_hiddens=(shape_split)) adversary_trainer_func = partial(possibly_pretrained_trainer, load_weights=True, pretrained_weight_keywords=['core']) adversary_type_key = "total_core_pretrained"
def paper_config_vul_str(): shape = "(64,)" shape_split = make_tuple(shape) lr = 0.0001 adversary_policy = partial(get_hierarchical_pg_pol, core_policy_hiddens=(shape_split), redundant_policy_hiddens=(shape_split), redundant_trainable=False) adversary_trainer_func = partial(possibly_pretrained_trainer, load_weights=True, pretrained_weight_keywords=['redundant']) adversary_training_iters = 800 outer_loop_iters = 1 adversary_type_key = "strategic" dir_loc=None