def detect(self, **kwargs): super().detect(**kwargs) target_class = self.attack.target_class self.attack.mark.random_pos = False self.attack.mark.height_offset = 0 self.attack.mark.width_offest = 0 if not self.random_pos: self.real_mask = self.attack.mark.mask mark_list, mask_list, loss_list = self.get_potential_triggers() mask_norms = mask_list.flatten(start_dim=1).norm(p=1, dim=1) print('mask norms: ', mask_norms) print('mask MAD: ', normalize_mad(mask_norms)) print('loss: ', loss_list) print('loss MAD: ', normalize_mad(loss_list)) if not self.random_pos: overlap = jaccard_idx(mask_list[self.attack.target_class], self.real_mask, select_num=self.attack.mark.mark_height * self.attack.mark.mark_width) print(f'Jaccard index: {overlap:.3f}') if not os.path.exists(self.folder_path): os.makedirs(self.folder_path) mark_list = [to_numpy(i) for i in mark_list] mask_list = [to_numpy(i) for i in mask_list] loss_list = [to_numpy(i) for i in loss_list] np.savez(self.folder_path + self.get_filename(target_class=target_class) + '.npz', mark_list=mark_list, mask_list=mask_list, loss_list=loss_list) print('Defense results saved at: ' + self.folder_path + self.get_filename(target_class=target_class) + '.npz')
def get_potential_triggers( self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: mark_list, mask_list, loss_list = [], [], [] # todo: parallel to avoid for loop file_path = os.path.normpath( os.path.join( self.folder_path, self.get_filename(target_class=self.target_class) + '.npz')) for label in range(self.model.num_classes): print('Class: ', output_iter(label, self.model.num_classes)) mark, mask, loss = self.remask(label) mark_list.append(mark) mask_list.append(mask) loss_list.append(loss) if not self.random_pos: overlap = jaccard_idx(mask, self.real_mask, select_num=self.attack.mark.mark_height * self.attack.mark.mark_width) print(f'Jaccard index: {overlap:.3f}') np.savez(file_path, mark_list=mark_list, mask_list=mask_list, loss_list=loss_list) print('Defense results saved at: ' + file_path) mark_list = torch.stack(mark_list) mask_list = torch.stack(mask_list) loss_list = torch.as_tensor(loss_list) return mark_list, mask_list, loss_list
def get_potential_triggers(self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: mark_list, mask_list, loss_list = [], [], [] # todo: parallel to avoid for loop for label in range(self.model.num_classes): print('Class: ', output_iter(label, self.model.num_classes)) mark, mask, loss = self.remask( label) mark_list.append(mark) mask_list.append(mask) loss_list.append(loss) if not self.random_pos: overlap = jaccard_idx(mask, self.real_mask, select_num=self.attack.mark.mark_height * self.attack.mark.mark_width) print(f'Jaccard index: {overlap:.3f}') mark_list = torch.stack(mark_list) mask_list = torch.stack(mask_list) loss_list = torch.as_tensor(loss_list) return mark_list, mask_list, loss_list
def trigger_detect(self, _input: torch.Tensor): """ Args: _input (torch.Tensor): (N, C, H, W) """ # get dominant color dom_c_list = [] for img in _input: dom_c: torch.Tensor = self.get_dominant_colour(img) # (C) dom_c_list.append(dom_c) dom_c = torch.stack(dom_c_list).unsqueeze(-1).unsqueeze( -1) # (N, C, 1, 1) # generate random numbers height, width = _input.shape[-2:] pos_height: torch.Tensor = torch.randint( low=0, high=height - self.size[0], size=[len(_input), self.sample_num]) # (N, sample_num) pos_width: torch.Tensor = torch.randint( low=0, high=width - self.size[1], size=[len(_input), self.sample_num]) # (N, sample_num) pos_list: torch.Tensor = torch.stack( [pos_height, pos_width]).transpose(0, -1) # (N, sample_num, 2) # block potential triggers on _input block_input = _input.unsqueeze(1).repeat(1, self.sample_num, 1, 1, 1) # (N, sample_num, C, H, W) for i in range(len(_input)): for j in range(self.sample_num): x = pos_list[i][j][0] y = pos_list[i][j][1] block_input[i, j, :, x:x + self.size[0], y:y + self.size[1]] = dom_c[i] # get potential triggers _input = to_tensor(_input) block_input = to_tensor(block_input) org_class = self.model.get_class(_input).unsqueeze(1).repeat( 1, self.sample_num) # (N, sample_num) block_class_list = [] for i in range(self.sample_num): block_class = self.model.get_class( block_input[:, i]) # (N, sample_num) block_class_list.append(block_class) block_class = torch.stack(block_class_list, dim=1) potential_idx: torch.Tensor = org_class.eq( block_class).detach().cpu() # (N, sample_num) # confirm triggers result_list = torch.zeros(len(_input), dtype=torch.bool) mask_shape = [_input.shape[0], _input.shape[-2], _input.shape[-1]] mask_list = torch.zeros(mask_shape, dtype=torch.float) # (N, C, height, width) mark_class = self.attack.mark for i in range(len(_input)): print(f'input {i:3d}') pos_pairs = pos_list[i][~potential_idx[i]] # (*, 2) if len(pos_pairs) == 0: continue for j, pos in enumerate(pos_pairs): self.attack.mark.height_offset = pos[0] self.attack.mark.width_offset = pos[1] mark_class.org_mark = _input[i, :, pos[0]:pos[0] + self.size[0], pos[1]:pos[1] + self.size[1]] mark_class.org_mask = torch.ones(self.size, dtype=torch.bool) mark_class.org_alpha_mask = torch.ones(self.size, dtype=torch.float) mark_class.mark, mark_class.mask, mark_class.alpha_mask = mark_class.mask_mark( height_offset=pos[0], width_offset=pos[1]) target_acc = self.confirm_backdoor() output_str = f' {j:3d} Acc: {target_acc:5.2f}' if not self.attack.mark.random_pos: overlap = jaccard_idx(mark_class.mask.detach().cpu(), self.real_mask.detach().cpu(), select_num=self.size[0] * self.size[1]) output_str += f' Jaccard Idx: {overlap:5.3f}' print(output_str) if target_acc > self.threshold_t: result_list[i] = True mask_list[i] = mark_class.mask return result_list, mask_list
def get_potential_triggers(self, neuron_dict: dict[int, list[dict]], _input: torch.Tensor, _label: torch.Tensor, use_mask=True) -> dict[int, list[dict]]: losses = AverageMeter('Loss', ':.4e') norms = AverageMeter('Norm', ':6.2f') jaccard = AverageMeter('Jaccard Idx', ':6.2f') score_list = [0.0] * len(list(neuron_dict.keys())) result_dict = {} for label, label_list in neuron_dict.items(): print('label: ', label) best_score = 100.0 for _dict in reversed(label_list): layer = _dict['layer'] neuron = _dict['neuron'] value = _dict['value'] # color = ('{red}' if label == self.attack.target_class else '{green}').format(**ansi) # _str = f'layer: {layer:<20} neuron: {neuron:<5d} label: {label:<5d}' # prints('{color}{_str}{reset}'.format(color=color, _str=_str, **ansi), indent=4) mark, mask, loss = self.remask(_input, layer=layer, neuron=neuron, label=label, use_mask=use_mask) self.attack.mark.mark = mark self.attack.mark.alpha_mask = mask self.attack.mark.mask = torch.ones_like(mark, dtype=torch.bool) self.attack.target_class = label attack_loss, attack_acc = self.model._validate( verbose=False, get_data_fn=self.attack.get_data, keep_org=False) _dict['loss'] = loss _dict['attack_acc'] = attack_acc _dict['attack_loss'] = attack_loss _dict['mask'] = to_numpy(mask) _dict['mark'] = to_numpy(mark) _dict['norm'] = float(mask.norm(p=1)) score = attack_loss + 7e-2 * float(mask.norm(p=1)) if score < best_score: best_score = score result_dict[label] = _dict if attack_acc > 90: losses.update(loss) norms.update(mask.norm(p=1)) _str = f' layer: {layer:20s} neuron: {neuron:5d} value: {value:.3f}' _str += f' loss: {loss:10.3f}' f' ATK Acc: {attack_acc:.3f}' f' ATK Loss: {attack_loss:10.3f}' f' Norm: {mask.norm(p=1):.3f}' f' Score: {score:.3f}' if not self.attack.mark.random_pos: overlap = jaccard_idx(mask, self.real_mask) _dict['jaccard'] = overlap _str += f' Jaccard: {overlap:.3f}' if attack_acc > 90: jaccard.update(overlap) else: _dict['jaccard'] = 0.0 print(_str) if not os.path.exists(self.folder_path): os.makedirs(self.folder_path) np.save( self.folder_path + self.get_filename(target_class=self.target_class) + '.npy', neuron_dict) np.save( self.folder_path + self.get_filename(target_class=self.target_class) + '_best.npy', result_dict) print( f'Label: {label:3d} loss: {result_dict[label]["loss"]:10.3f} ATK loss: {result_dict[label]["attack_loss"]:10.3f} Norm: {result_dict[label]["norm"]:10.3f} Jaccard: {result_dict[label]["jaccard"]:10.3f} Score: {best_score:.3f}' ) score_list[label] = best_score print('Score: ', score_list) print('Score MAD: ', normalize_mad(score_list)) return neuron_dict
def remask(self, label: int) -> tuple[torch.Tensor, torch.Tensor]: generator = Generator(self.noise_dim, self.dataset.num_classes, self.dataset.data_shape) for param in generator.parameters(): param.requires_grad_() optimizer = optim.Adam(generator.parameters(), lr=self.remask_lr) optimizer.zero_grad() # mask = self.attack.mark.mask losses = AverageMeter('Loss', ':.4e') entropy = AverageMeter('Entropy', ':.4e') norm = AverageMeter('Norm', ':.4e') acc = AverageMeter('Acc', ':6.2f') torch.manual_seed(env['seed']) noise = torch.rand(1, self.noise_dim, device=env['device']) mark = torch.zeros(self.dataset.data_shape, device=env['device']) for _epoch in range(self.remask_epoch): losses.reset() entropy.reset() norm.reset() acc.reset() epoch_start = time.perf_counter() loader = self.loader if env['tqdm']: loader = tqdm(loader) for data in loader: _input, _label = self.model.get_data(data) batch_size = _label.size(0) poison_label = label * torch.ones_like(_label) mark = generator( noise, torch.tensor([label], device=poison_label.device, dtype=poison_label.dtype)) poison_input = (_input + mark).clamp(0, 1) _output = self.model(poison_input) batch_acc = poison_label.eq(_output.argmax(1)).float().mean() batch_entropy = self.model.criterion(_output, poison_label) batch_norm = mark.flatten(start_dim=1).norm(p=1, dim=1).mean() batch_loss = batch_entropy + self.gamma_2 * batch_norm acc.update(batch_acc.item(), batch_size) entropy.update(batch_entropy.item(), batch_size) norm.update(batch_norm.item(), batch_size) losses.update(batch_loss.item(), batch_size) batch_loss.backward() optimizer.step() optimizer.zero_grad() epoch_time = str( datetime.timedelta(seconds=int(time.perf_counter() - epoch_start))) pre_str = '{blue_light}Epoch: {0}{reset}'.format( output_iter(_epoch + 1, self.remask_epoch), **ansi).ljust(64 if env['color'] else 35) _str = ' '.join([ f'Loss: {losses.avg:.4f},'.ljust(20), f'Acc: {acc.avg:.2f}, '.ljust(20), f'Norm: {norm.avg:.4f},'.ljust(20), f'Entropy: {entropy.avg:.4f},'.ljust(20), f'Time: {epoch_time},'.ljust(20), ]) prints(pre_str, _str, prefix='{upline}{clear_line}'.format( **ansi) if env['tqdm'] else '', indent=4) def get_data_fn(data, **kwargs): _input, _label = self.model.get_data(data) poison_label = torch.ones_like(_label) * label poison_input = (_input + mark).clamp(0, 1) return poison_input, poison_label self.model._validate(print_prefix='Validate Trigger Tgt', get_data_fn=get_data_fn, indent=4) if not self.attack.mark.random_pos: overlap = jaccard_idx(mark.mean(dim=0), self.real_mask, select_num=self.attack.mark.mark_height * self.attack.mark.mark_width) print(f' Jaccard index: {overlap:.3f}') for param in generator.parameters(): param.requires_grad = False return losses.avg, mark