def benchmark_adversarial_policy(args=get_args()): env = make_atari_env_watch(args) if args.save_video: log_path = os.path.join(args.logdir, args.task, args.policy, "critical_point_attack_eps-" + str(args.eps) +\ "_n-" + str(args.n) + "_m-" + str(args.m) + "_" + args.target_policy) env = gym.wrappers.Monitor(env, log_path, force=True) args.state_shape = env.observation_space.shape or env.observation_space.n args.action_shape = env.env.action_space.shape or env.env.action_space.n # should be N_FRAMES x H x W print("Observations shape: ", args.state_shape) print("Actions shape: ", args.action_shape) np.random.seed(args.seed) torch.manual_seed(args.seed) # make policy policy = make_policy(args, args.policy, args.resume_path) # make target policy if args.target_policy is not None: victim_policy = make_policy(args, args.target_policy, args.target_policy_path) adv_net = make_victim_network(args, victim_policy) else: adv_net = make_victim_network(args, policy) # define observations adversarial attack obs_adv_atk, atk_type = make_img_adv_attack(args, adv_net, targeted=True) print("Attack type:", atk_type) # define adversarial collector acts_mask = None dam = None if "Pong" in args.task: acts_mask = [3, 4] dam = dam_pong delta = 100 if "Breakout" in args.task: acts_mask = [1, 2, 3] dam = dam_breakout delta = 100 collector = critical_point_attack_collector( policy, env, obs_adv_atk, perfect_attack=args.perfect_attack, acts_mask=acts_mask, device=args.device, full_search=args.full_search, repeat_adv_act=args.repeat_act, dam=dam, delta=delta) collector.n = int(args.n * args.repeat_act) collector.m = int(args.m * args.repeat_act) start_time = time.time() test_adversarial_policy = collector.collect(n_episode=args.test_num) print("Attack finished in %s seconds" % (time.time() - start_time)) atk_freq_ = test_adversarial_policy['atk_rate(%)'] reward = test_adversarial_policy['rew'] n_attacks = test_adversarial_policy['n_atks'] print("attack frequency =", atk_freq_, "| n_attacks =", n_attacks, "| n_succ_atks (%)", test_adversarial_policy['succ_atks(%)'], "| reward: ", reward)
def benchmark_adversarial_policy(args=get_args()): env = make_atari_env_watch(args) args.state_shape = env.observation_space.shape or env.observation_space.n args.action_shape = env.env.action_space.shape or env.env.action_space.n # should be N_FRAMES x H x W print("Observations shape: ", args.state_shape) print("Actions shape: ", args.action_shape) np.random.seed(args.seed) torch.manual_seed(args.seed) # make policy policy = make_policy(args, args.policy, args.resume_path) # make target policy transferability_type = "" # THIS PART MAY BE REMOVED if "def" in args.logdir and args.target_policy is None: warnings.warn( "You are generating adversarial observation on the defended model, you may want to craft them on" "the undefended version instead") if args.target_policy is not None: victim_policy = make_policy(args, args.target_policy, args.target_policy_path) transferability_type = "_transf_" + str(args.target_policy) adv_net = make_victim_network(args, victim_policy) else: adv_net = make_victim_network(args, policy) # define observations adversarial attack obs_adv_atk, atk_type = make_img_adv_attack(args, adv_net, targeted=False) print("Attack type:", atk_type) # define adversarial collector collector = uniform_attack_collector(policy, env, obs_adv_atk, perfect_attack=args.perfect_attack, device=args.device) atk_freq = np.linspace(args.min, args.max, args.steps, endpoint=True) n_attacks = [] rewards = [] for f in atk_freq: collector.atk_frequency = f test_adversarial_policy = collector.collect(n_episode=args.test_num) atk_freq_ = test_adversarial_policy['atk_rate(%)'] rewards.append(test_adversarial_policy['rew']) n_attacks.append(test_adversarial_policy['n_atks']) print("attack frequency =", atk_freq_, "| n_attacks =", n_attacks[-1], "| n_succ_atks (%)", test_adversarial_policy['succ_atks(%)'], "| reward: ", rewards[-1]) # pprint.pprint(test_adversarial_policy) log_path = os.path.join( args.logdir, args.task, args.policy, "uniform_attack_" + atk_type + transferability_type + ".npy") # save results with open(log_path, 'wb') as f: np.save(f, atk_freq) np.save(f, n_attacks) np.save(f, rewards) print("Results saved to", log_path)
def benchmark_adversarial_policy(args=get_args()): env = make_atari_env_watch(args) args.state_shape = env.observation_space.shape or env.observation_space.n args.action_shape = env.env.action_space.shape or env.env.action_space.n # should be N_FRAMES x H x W print("Observations shape: ", args.state_shape) print("Actions shape: ", args.action_shape) np.random.seed(args.seed) torch.manual_seed(args.seed) # make policy policy = make_policy(args, args.policy, args.resume_path) # make target policy transferability_type = "" if args.target_policy is not None: victim_policy = make_policy(args, args.target_policy, args.target_policy_path) transferability_type = "_transf_" + str(args.target_policy) adv_net = make_victim_network(args, victim_policy) else: adv_net = make_victim_network(args, policy) # define observations adversarial attack obs_adv_atk, atk_type = make_img_adv_attack(args, adv_net, targeted=True) print("Attack type:", atk_type) # define adversarial collector collector = strategically_timed_attack_collector( policy, env, obs_adv_atk, perfect_attack=args.perfect_attack, softmax=False if args.no_softmax else True, device=args.device) beta = np.linspace(args.min, args.max, args.steps, endpoint=True) atk_freq = [] n_attacks = [] rewards = [] for b in beta: collector.beta = b test_adversarial_policy = collector.collect(n_episode=args.test_num) rewards.append(test_adversarial_policy['rew']) atk_freq.append(test_adversarial_policy['atk_rate(%)']) n_attacks.append(test_adversarial_policy['n_atks']) print("attack frequency =", atk_freq[-1], "| n_attacks =", n_attacks[-1], "| n_succ_atks (%)", test_adversarial_policy['succ_atks(%)'], "| reward: ", rewards[-1]) # pprint.pprint(test_adversarial_policy) log_path = os.path.join( args.logdir, args.task, args.policy, "strategically_timed_attack_" + atk_type + transferability_type + ".npy") with open(log_path, 'wb') as f: np.save(f, atk_freq) np.save(f, n_attacks) np.save(f, rewards) print("Results saved to", log_path)
def benchmark_adversarial_policy(args=get_args()): env = make_atari_env_watch(args) if args.save_video: log_path = os.path.join(args.logdir, args.task, args.policy, "adversarial_policy_attack_eps-" + str(args.eps) +\ "_beta-" + str(args.beta) + "_" + args.target_policy) env = gym.wrappers.Monitor(env, log_path, force=True) args.state_shape = env.observation_space.shape or env.observation_space.n args.action_shape = env.env.action_space.shape or env.env.action_space.n # should be N_FRAMES x H x W print("Observations shape: ", args.state_shape) print("Actions shape: ", args.action_shape) np.random.seed(args.seed) torch.manual_seed(args.seed) # make policy policy = make_policy(args, args.policy, args.resume_path) # make target policy if args.target_policy is not None: victim_policy = make_policy(args, args.target_policy, args.target_policy_path) adv_net = make_victim_network(args, victim_policy) else: adv_net = make_victim_network(args, policy) # define observations adversarial attack obs_adv_atk, atk_type = make_img_adv_attack(args, adv_net, targeted=True) print("Attack type:", atk_type) # define adversarial policy adv_policy = None if args.adv_policy is not None: adv_policy = make_policy(args, args.adv_policy, args.adv_policy_path) # define adversarial collector collector = adversarial_policy_attack_collector( policy, env, obs_adv_atk, perfect_attack=args.perfect_attack, softmax=False if args.no_softmax else True, device=args.device, adv_policy=adv_policy) collector.beta = args.beta start_time = time.time() test_adversarial_policy = collector.collect(n_episode=args.test_num) print("Attack finished in %s seconds" % (time.time() - start_time)) atk_freq_ = test_adversarial_policy['atk_rate(%)'] reward = test_adversarial_policy['rew'] n_attacks = test_adversarial_policy['n_atks'] print("attack frequency =", atk_freq_, "| n_attacks =", n_attacks, "| n_succ_atks (%)", test_adversarial_policy['succ_atks(%)'], "| reward: ", reward)
def benchmark_adversarial_policy(args=get_args()): env = make_atari_env_watch(args) args.state_shape = env.observation_space.shape or env.observation_space.n args.action_shape = env.env.action_space.shape or env.env.action_space.n # should be N_FRAMES x H x W print("Observations shape: ", args.state_shape) print("Actions shape: ", args.action_shape) np.random.seed(args.seed) torch.manual_seed(args.seed) # make policy policy = make_policy(args, args.policy, args.resume_path) # make target policy transferability_type = "" if args.target_policy is not None: victim_policy = make_policy(args, args.target_policy, args.target_policy_path) transferability_type = "_transf_" + str(args.target_policy) adv_net = make_victim_network(args, victim_policy) else: adv_net = make_victim_network(args, policy) # define observations adversarial attack obs_adv_atk, atk_type = make_img_adv_attack(args, adv_net, targeted=True) print("Attack type:", atk_type) # define adversarial collector acts_mask = None if "Pong" in args.task: acts_mask = [3, 4] delta = 0 if "Breakout" in args.task: acts_mask = [1, 2, 3] delta = 0 collector = critical_strategy_attack_collector( policy, env, obs_adv_atk, perfect_attack=args.perfect_attack, acts_mask=acts_mask, device=args.device, full_search=args.full_search, repeat_adv_act=args.repeat_act, delta=delta) n_range = list(np.arange(args.min, args.max)) + [args.max] m_range = [0., 0.25, 0.5, 0.75, 1.] atk_freq = [] n_attacks = [] rewards = [] for n in n_range: for m in m_range: collector.n = int(n * args.repeat_act) collector.m = int(n * args.repeat_act + n * args.repeat_act * m) test_adversarial_policy = collector.collect( n_episode=args.test_num) rewards.append(test_adversarial_policy['rew']) atk_freq.append(test_adversarial_policy['atk_rate(%)']) n_attacks.append(test_adversarial_policy['n_atks']) print("n =", str(int(n * args.repeat_act)), "m =", str(int(n * args.repeat_act + n * args.repeat_act * m)), "| attack frequency =", atk_freq[-1], "| n_attacks =", n_attacks[-1], "| n_succ_atks (%)", test_adversarial_policy['succ_atks(%)'], "| reward: ", rewards[-1]) # pprint.pprint(test_adversarial_policy) log_path = os.path.join( args.logdir, args.task, args.policy, "critical_strategy_attack_" + atk_type + transferability_type + ".npy") with open(log_path, 'wb') as f: np.save(f, atk_freq) np.save(f, n_attacks) np.save(f, rewards) print("Results saved to", log_path)
def test_dqn(args=get_args()): env = make_atari_env(args) args.state_shape = env.observation_space.shape or env.observation_space.n args.action_shape = env.env.action_space.shape or env.env.action_space.n # should be N_FRAMES x H x W print("Observations shape: ", args.state_shape) print("Actions shape: ", args.action_shape) # make environments train_envs = SubprocVectorEnv( [lambda: make_atari_env(args) for _ in range(args.training_num)]) test_envs = SubprocVectorEnv( [lambda: make_atari_env_watch(args) for _ in range(1)]) # seed np.random.seed(args.seed) torch.manual_seed(args.seed) test_envs.seed(args.seed) # define model net = DQN(*args.state_shape, args.action_shape, args.device).to(args.device) optim = torch.optim.Adam(net.parameters(), lr=args.lr) # define policy policy = DQNPolicy(net, optim, args.gamma, args.n_step, target_update_freq=args.target_update_freq) # load a previous policy if args.resume_path: policy.load_state_dict(torch.load(args.resume_path)) print("Loaded agent from: ", args.resume_path) if args.target_model_path: victim_policy = copy.deepcopy(policy) victim_policy.load_state_dict(torch.load(args.target_model_path)) print("Loaded victim agent from: ", args.target_model_path) else: victim_policy = policy args.target_policy, args.policy = "dqn", "dqn" args.perfect_attack = False adv_net = make_victim_network(args, victim_policy) adv_atk, _ = make_img_adv_attack(args, adv_net, targeted=False) buffer = ReplayBuffer(args.buffer_size, ignore_obs_next=True) # collector train_collector = adversarial_training_collector( policy, train_envs, adv_atk, buffer, atk_frequency=args.atk_freq, device=args.device) test_collector = adversarial_training_collector( policy, test_envs, adv_atk, buffer, atk_frequency=args.atk_freq, test=True, device=args.device) # log log_path = os.path.join(args.logdir, args.task, 'dqn') writer = SummaryWriter(log_path) def save_fn(policy, policy_name='policy.pth'): torch.save(policy.state_dict(), os.path.join(log_path, policy_name)) def stop_fn(x): return 0 def train_fn(epoch, env_step): # nature DQN setting, linear decay in the first 1M steps if env_step <= 1e6: eps = args.eps_train - env_step / 1e6 * \ (args.eps_train - args.eps_train_final) else: eps = args.eps_train_final policy.set_eps(eps) writer.add_scalar('train/eps', eps, global_step=env_step) print("set eps =", policy.eps) def test_fn(epoch, env_step): policy.set_eps(args.eps_test) # watch agent's performance def watch(): assert args.target_model_path is not None print("Testing agent ...") policy.eval() policy.set_eps(args.eps_test) test_envs.seed(args.seed) test_collector.reset() result = test_collector.collect(n_episode=[args.test_num], render=args.render) pprint.pprint(result) if args.watch: watch() exit(0) # test train_collector and start filling replay buffer train_collector.collect(n_step=args.batch_size * 4) # trainer result = offpolicy_trainer(policy, train_collector, test_collector, args.epoch, args.step_per_epoch, args.collect_per_step, args.test_num, args.batch_size, train_fn=train_fn, test_fn=test_fn, stop_fn=stop_fn, save_fn=save_fn, writer=writer, test_in_train=False) pprint.pprint(result) watch()
accuracies = [] collector.atk_frequency = args.attack_freq for eps in atk_eps: # define observations adversarial attack args.eps, args.image_attack = eps, img_atk episodes = args.test_num if img_atk == "No Attack": # we can assign a random attack since the frequency is 0 args.image_attack = "GradientSignAttack" collector.atk_frequency = 0 episodes *= 2 if collector.atk_frequency > 0 or len(rewards) == 0: collector.obs_adv_atk, _ = make_img_adv_attack( args, adv_net, targeted=args.targeted) test_adversarial_policy = collector.collect( n_episode=episodes) rewards.append(test_adversarial_policy['rew']) accuracies.append(test_adversarial_policy['succ_atks(%)']) else: rewards.append(rewards[-1]) accuracies.append(accuracies[-1]) str_rewards = [str(x) for x in rewards] str_accuracies = [str(x) for x in accuracies] print(attack_labels[img_atk] + "|" + defense_labels[defense] + "|" + " ".join(str_rewards) + "|" + " ".join(str_accuracies)) f_rew.write(attack_labels[img_atk] + "|" + defense_labels[defense] + "|" + " ".join(str_rewards) + "|" + " ".join(str_accuracies) + "\n")