def __init__(self, g_list, test_g_list, env): self.g_list = g_list if test_g_list is None: self.test_g_list = g_list else: self.test_g_list = test_g_list self.mem_pool = NstepReplayMem(memory_size=50000, n_steps=2) self.env = env # self.net = QNet() self.net = NStepQNet(2) self.old_net = NStepQNet(2) if cmd_args.ctx == 'gpu': self.net = self.net.cuda() self.old_net = self.old_net.cuda() self.eps_start = 1.0 self.eps_end = 1.0 self.eps_step = 10000 self.burn_in = 100 self.step = 0 self.best_eval = None self.pos = 0 self.sample_idxes = list(range(len(g_list))) random.shuffle(self.sample_idxes) self.take_snapshot()
def __init__(self, env, features, labels, idx_meta, idx_test, list_action_space, num_wrong=0): self.features = features self.labels = labels self.idx_meta = idx_meta self.idx_test = idx_test self.num_wrong = num_wrong self.list_action_space = list_action_space self.mem_pool = NstepReplayMem( memory_size=500000, n_steps=2 * cmd_args.num_mod, balance_sample=cmd_args.reward_type == 'binary') self.env = env # self.net = QNetNode(features, labels, list_action_space) # self.old_net = QNetNode(features, labels, list_action_space) self.net = NStepQNetNode(2 * cmd_args.num_mod, features, labels, list_action_space) self.old_net = NStepQNetNode(2 * cmd_args.num_mod, features, labels, list_action_space) if cmd_args.ctx == 'gpu': self.net = self.net.cuda() self.old_net = self.old_net.cuda() self.eps_start = 1.0 self.eps_end = 0.05 self.eps_step = 100000 self.burn_in = 10 self.step = 0 self.pos = 0 self.best_eval = None self.take_snapshot()
class Agent(object): def __init__(self, env, features, labels, idx_meta, idx_test, list_action_space, num_wrong=0): self.features = features self.labels = labels self.idx_meta = idx_meta self.idx_test = idx_test self.num_wrong = num_wrong self.list_action_space = list_action_space self.mem_pool = NstepReplayMem( memory_size=500000, n_steps=2 * cmd_args.num_mod, balance_sample=cmd_args.reward_type == 'binary') self.env = env # self.net = QNetNode(features, labels, list_action_space) # self.old_net = QNetNode(features, labels, list_action_space) self.net = NStepQNetNode(2 * cmd_args.num_mod, features, labels, list_action_space) self.old_net = NStepQNetNode(2 * cmd_args.num_mod, features, labels, list_action_space) if cmd_args.ctx == 'gpu': self.net = self.net.cuda() self.old_net = self.old_net.cuda() self.eps_start = 1.0 self.eps_end = 0.05 self.eps_step = 100000 self.burn_in = 10 self.step = 0 self.pos = 0 self.best_eval = None self.take_snapshot() def take_snapshot(self): self.old_net.load_state_dict(self.net.state_dict()) def make_actions(self, time_t, greedy=False): self.eps = self.eps_end + max( 0., (self.eps_start - self.eps_end) * (self.eps_step - max(0., self.step)) / self.eps_step) if random.random() < self.eps and not greedy: actions = self.env.uniformRandActions() else: cur_state = self.env.getStateRef() actions, values = self.net(time_t, cur_state, None, greedy_acts=True, is_inference=True) actions = list(actions.cpu().numpy()) return actions def run_simulation(self): if (self.pos + 1) * cmd_args.batch_size > len(self.idx_test): self.pos = 0 random.shuffle(self.idx_test) selected_idx = self.idx_test[self.pos * cmd_args.batch_size:(self.pos + 1) * cmd_args.batch_size] self.pos += 1 self.env.setup(selected_idx) t = 0 list_of_list_st = [] list_of_list_at = [] while not self.env.isTerminal(): list_at = self.make_actions(t) list_st = self.env.cloneState() self.env.step(list_at) assert (env.rewards is not None) == env.isTerminal() if env.isTerminal(): rewards = env.rewards s_prime = None else: rewards = np.zeros(len(list_at), dtype=np.float32) s_prime = self.env.cloneState() self.mem_pool.add_list(list_st, list_at, rewards, s_prime, [env.isTerminal()] * len(list_at), t) list_of_list_st.append(deepcopy(list_st)) list_of_list_at.append(deepcopy(list_at)) t += 1 if cmd_args.reward_type == 'nll': return T = t cands = self.env.sample_pos_rewards(len(selected_idx)) if len(cands): for c in cands: sample_idx, target = c doable = True for t in range(T): if self.list_action_space[target] is not None and ( not list_of_list_at[t][sample_idx] in self.list_action_space[target]): doable = False break if not doable: continue for t in range(T): s_t = list_of_list_st[t][sample_idx] a_t = list_of_list_at[t][sample_idx] s_t = [target, deepcopy(s_t[1]), s_t[2]] if t + 1 == T: s_prime = (None, None, None) r = 1.0 term = True else: s_prime = list_of_list_st[t + 1][sample_idx] s_prime = [target, deepcopy(s_prime[1]), s_prime[2]] r = 0.0 term = False self.mem_pool.mem_cells[t].add(s_t, a_t, r, s_prime, term) def eval(self): self.env.setup(self.idx_meta) t = 0 while not self.env.isTerminal(): list_at = self.make_actions(t, greedy=True) self.env.step(list_at) t += 1 acc = 1 - (self.env.binary_rewards + 1.0) / 2.0 acc = np.sum(acc) / (len(self.idx_meta) + self.num_wrong) print('\033[93m average test: acc %.5f\033[0m' % (acc)) if cmd_args.phase == 'train' and self.best_eval is None or acc < self.best_eval: print( '----saving to best attacker since this is the best attack rate so far.----' ) torch.save(self.net.state_dict(), cmd_args.save_dir + '/epoch-best.model') with open(cmd_args.save_dir + '/epoch-best.txt', 'w') as f: f.write('%.4f\n' % acc) with open(cmd_args.save_dir + '/attack_solution.txt', 'w') as f: for i in range(len(self.idx_meta)): f.write('%d: [' % self.idx_meta[i]) for e in self.env.modified_list[i].directed_edges: f.write('(%d %d)' % e) f.write('] succ: %d\n' % (self.env.binary_rewards[i])) self.best_eval = acc def train(self): pbar = tqdm(range(self.burn_in), unit='batch') for p in pbar: self.run_simulation() pbar = tqdm(range(cmd_args.num_steps), unit='steps') optimizer = optim.Adam(self.net.parameters(), lr=cmd_args.learning_rate) for self.step in pbar: self.run_simulation() if self.step % 123 == 0: self.take_snapshot() if self.step % 500 == 0: self.eval() cur_time, list_st, list_at, list_rt, list_s_primes, list_term = self.mem_pool.sample( batch_size=cmd_args.batch_size) list_target = torch.Tensor(list_rt) if cmd_args.ctx == 'gpu': list_target = list_target.cuda() if not list_term[0]: target_nodes, _, picked_nodes = zip(*list_s_primes) _, q_t_plus_1 = self.old_net(cur_time + 1, list_s_primes, None) _, q_rhs = node_greedy_actions(target_nodes, picked_nodes, q_t_plus_1, self.old_net) list_target += q_rhs list_target = Variable(list_target.view(-1, 1)) _, q_sa = self.net(cur_time, list_st, list_at) q_sa = torch.cat(q_sa, dim=0) loss = F.mse_loss(q_sa, list_target) optimizer.zero_grad() loss.backward() optimizer.step() pbar.set_description('eps: %.5f, loss: %0.5f, q_val: %.5f' % (self.eps, loss, torch.mean(q_sa)[0]))
class Agent(object): def __init__(self, g_list, test_g_list, env): self.g_list = g_list if test_g_list is None: self.test_g_list = g_list else: self.test_g_list = test_g_list self.mem_pool = NstepReplayMem(memory_size=50000, n_steps=2) self.env = env # self.net = QNet() self.net = NStepQNet(2) self.old_net = NStepQNet(2) if cmd_args.ctx == 'gpu': self.net = self.net.cuda() self.old_net = self.old_net.cuda() self.eps_start = 1.0 self.eps_end = 1.0 self.eps_step = 10000 self.burn_in = 100 self.step = 0 self.best_eval = None self.pos = 0 self.sample_idxes = list(range(len(g_list))) random.shuffle(self.sample_idxes) self.take_snapshot() def take_snapshot(self): self.old_net.load_state_dict(self.net.state_dict()) def make_actions(self, time_t, greedy=False): self.eps = self.eps_end + max( 0., (self.eps_start - self.eps_end) * (self.eps_step - max(0., self.step)) / self.eps_step) if random.random() < self.eps and not greedy: actions = self.env.uniformRandActions() else: cur_state = self.env.getStateRef() actions, _, _ = self.net(time_t, cur_state, None, greedy_acts=True) actions = list(actions.cpu().numpy()) return actions def run_simulation(self): if (self.pos + 1) * cmd_args.batch_size > len(self.sample_idxes): self.pos = 0 random.shuffle(self.sample_idxes) selected_idx = self.sample_idxes[self.pos * cmd_args.batch_size:(self.pos + 1) * cmd_args.batch_size] self.pos += 1 self.env.setup([self.g_list[idx] for idx in selected_idx]) t = 0 while not env.isTerminal(): list_at = self.make_actions(t) list_st = self.env.cloneState() self.env.step(list_at) assert (env.rewards is not None) == env.isTerminal() if env.isTerminal(): rewards = env.rewards s_prime = None else: rewards = np.zeros(len(list_at), dtype=np.float32) s_prime = self.env.cloneState() self.mem_pool.add_list(list_st, list_at, rewards, s_prime, [env.isTerminal()] * len(list_at), t) t += 1 def eval(self): self.env.setup(deepcopy(self.test_g_list)) t = 0 while not self.env.isTerminal(): list_at = self.make_actions(t, greedy=True) self.env.step(list_at) t += 1 test_loss = loop_dataset(env.g_list, env.classifier, list(range(len(env.g_list)))) print('\033[93m average test: loss %.5f acc %.5f\033[0m' % (test_loss[0], test_loss[1])) if cmd_args.phase == 'train' and self.best_eval is None or test_loss[ 1] < self.best_eval: print( '----saving to best attacker since this is the best attack rate so far.----' ) torch.save(self.net.state_dict(), cmd_args.save_dir + '/epoch-best.model') with open(cmd_args.save_dir + '/epoch-best.txt', 'w') as f: f.write('%.4f\n' % test_loss[1]) self.best_eval = test_loss[1] reward = np.mean(self.env.rewards) print(reward) return reward, test_loss[1] def train(self): log_out = open(cmd_args.logfile, 'w', 0) pbar = tqdm(range(self.burn_in), unit='batch') for p in pbar: self.run_simulation() pbar = tqdm(range(local_args.num_steps), unit='steps') optimizer = optim.Adam(self.net.parameters(), lr=cmd_args.learning_rate) for self.step in pbar: self.run_simulation() if self.step % 100 == 0: self.take_snapshot() if self.step % 100 == 0: r, acc = self.eval() log_out.write('%d %.6f %.6f\n' % (self.step, r, acc)) cur_time, list_st, list_at, list_rt, list_s_primes, list_term = self.mem_pool.sample( batch_size=cmd_args.batch_size) list_target = torch.Tensor(list_rt) if cmd_args.ctx == 'gpu': list_target = list_target.cuda() cleaned_sp = [] nonterms = [] for i in range(len(list_st)): if not list_term[i]: cleaned_sp.append(list_s_primes[i]) nonterms.append(i) if len(cleaned_sp): _, _, banned = zip(*cleaned_sp) _, q_t_plus_1, prefix_sum_prime = self.old_net( cur_time + 1, cleaned_sp, None) _, q_rhs = greedy_actions(q_t_plus_1, prefix_sum_prime, banned) list_target[nonterms] = q_rhs # list_target = get_supervision(self.env.classifier, list_st, list_at) list_target = Variable(list_target.view(-1, 1)) _, q_sa, _ = self.net(cur_time, list_st, list_at) loss = F.mse_loss(q_sa, list_target) optimizer.zero_grad() loss.backward() optimizer.step() pbar.set_description('exp: %.5f, loss: %0.5f' % (self.eps, loss)) log_out.close()
class Agent(object): def __init__(self, g_list, test_g_list, env): self.g_list = g_list if test_g_list is None: self.test_g_list = g_list else: self.test_g_list = test_g_list self.mem_pool = NstepReplayMem(memory_size=50000, n_steps=2) self.env = env # self.net = QNet() self.net = NStepQNet(2) self.old_net = NStepQNet(2) if cmd_args.ctx == 'gpu': self.net = self.net.cuda() self.old_net = self.old_net.cuda() self.eps_start = 1.0 self.eps_end = 1.0 self.eps_step = 10000 self.burn_in = 100 self.step = 0 self.best_eval = None self.pos = 0 self.sample_idxes = list(range(len(g_list))) random.shuffle(self.sample_idxes) self.take_snapshot() def take_snapshot(self): self.old_net.load_state_dict(self.net.state_dict()) def make_actions(self, time_t, greedy=False): self.eps = self.eps_end + max( 0., (self.eps_start - self.eps_end) * (self.eps_step - max(0., self.step)) / self.eps_step) if random.random() < self.eps and not greedy: actions = self.env.uniformRandActions() else: cur_state = self.env.getStateRef() actions, _, _ = self.net(time_t, cur_state, None, greedy_acts=True) actions = list(actions.cpu().numpy()) return actions def run_simulation(self): if (self.pos + 1) * cmd_args.batch_size > len(self.sample_idxes): self.pos = 0 random.shuffle(self.sample_idxes) selected_idx = self.sample_idxes[self.pos * cmd_args.batch_size:(self.pos + 1) * cmd_args.batch_size] self.pos += 1 self.env.setup([self.g_list[idx] for idx in selected_idx]) t = 0 while not env.isTerminal(): list_at = self.make_actions(t) list_st = self.env.cloneState() self.env.step(list_at) assert (env.rewards is not None) == env.isTerminal() if env.isTerminal(): rewards = env.rewards s_prime = None else: rewards = np.zeros(len(list_at), dtype=np.float32) s_prime = self.env.cloneState() self.mem_pool.add_list(list_st, list_at, rewards, s_prime, [env.isTerminal()] * len(list_at), t) t += 1 def eval(self): self.env.setup(deepcopy(self.test_g_list)) t = 0 while not self.env.isTerminal(): list_at = self.make_actions(t, greedy=True) self.env.step(list_at) t += 1 test_loss = loop_dataset(env.g_list, env.classifier, list(range(len(env.g_list)))) print('\033[93m average test: loss %.5f acc %.5f\033[0m' % (test_loss[0], test_loss[1])) with open('%s/edge_added.txt' % cmd_args.save_dir, 'w') as f: for i in range(len(self.test_g_list)): f.write('%d %d ' % (self.test_g_list[i].label, env.pred[i] + 1)) f.write('%d %d\n' % env.added_edges[i]) reward = np.mean(self.env.rewards) print(reward) return reward, test_loss[1]