def _forward_and_update_delta(self, optimizer, x_atanh, delta, y_onehot, loss_coeffs): optimizer.zero_grad() adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max) transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max) output = self.predict(adv) l2distsq = calc_l2distsq(adv, transimgs_rescale) loss = self._loss_fn(output, y_onehot, l2distsq, loss_coeffs) loss.backward() optimizer.step() return loss.item(), l2distsq.data, output.data, adv.data
def _outputs_and_loss(self, x, modifiers, starting_atanh, y, const, taus): adversarials = tanh_rescale(starting_atanh + modifiers, self.clip_min, self.clip_max) outputs = self.predict(adversarials) y_onehot = to_one_hot(y, self.num_classes).float() real = (y_onehot * outputs).sum(dim=1) other = ((1.0 - y_onehot) * outputs - (y_onehot * TARGET_MULT)).max(dim=1)[0] # - (y_onehot * TARGET_MULT) is for the true label not to be selected if self.targeted: loss1 = torch.clamp(other - real, min=0.) else: loss1 = torch.clamp(real - other, min=0.) loss1 = const * loss1 image_dimensions = tuple(range(1, len(x.shape))) taus_shape = (-1, ) + (1, ) * (len(x.shape) - 1) penalties = torch.clamp(torch.abs(x - adversarials) - taus.view(taus_shape), min=0) loss2 = torch.sum(penalties, dim=image_dimensions) assert loss1.shape == loss2.shape loss = loss1 + loss2 return outputs.detach(), loss
def _forward_and_update_delta(self, optimizer, x_atanh, delta, y_onehot, loss_coeffs): optimizer.zero_grad() adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max) transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max) if self.normalize_fn == None: output = self.predict(adv) else: output = self.predict(self.normalize_fn(adv)) linfdistsq, _ = torch.max(torch.abs(adv - transimgs_rescale).view( adv.size()[0], -1), dim=1) loss = self._loss_fn(output, y_onehot, linfdistsq, loss_coeffs) loss.backward() optimizer.step() return loss.item(), linfdistsq.data, output.data, adv.data
def _outputs_and_loss(self, x, modifiers, starting_atanh, y, const, taus, active_mask=None, filter_=None): # If you're comparing with Carlini's original implementation, x # is the name that has been given to tf.tanh(timg)/2, while # adversarials is the name that has been given to tf.tanh(modifier + simg)/2, aka newimg adversarials = tanh_rescale(starting_atanh + modifiers, self.clip_min, self.clip_max) assert x.shape == adversarials.shape outputs = self._outputs(adversarials, active_mask=active_mask, filter_=filter_) assert outputs.shape == (adversarials.shape[0], self.num_classes) y_onehot = to_one_hot(y, self.num_classes).float() assert y_onehot.shape == outputs.shape real = (y_onehot * outputs).sum(dim=1) other = ((1.0 - y_onehot) * outputs - (y_onehot * TARGET_MULT)).max(dim=1)[0] # - (y_onehot * TARGET_MULT) is for the true label not to be selected if self.targeted: loss1 = torch.clamp(other - real, min=0.) else: loss1 = torch.clamp(real - other, min=0.) image_dimensions = tuple(range(1, len(x.shape))) # Reshape taus to [batch_size, 1, 1, 1] for broadcasting taus_shape = (len(x), ) + (1, ) * (len(x.shape) - 1) penalties = torch.clamp(torch.abs(adversarials - x) - taus.view(taus_shape), min=0) assert penalties.shape == x.shape loss2 = torch.sum(penalties, dim=image_dimensions) assert loss2.shape == loss1.shape losses = const * loss1 + loss2 assert losses.shape == (len(x), ) # losses is returned as a (batch_size,) vector to support abort_early # Only later it is converted to a scalar return outputs.detach(), losses
def perturb(self, x, y=None): x, y = self._verify_and_process_inputs(x, y) # Initialization if y is None: y = self._get_predicted_label(x) x = replicate_input(x) # batch_size = len(x) final_advs = x x_atanh = self._get_arctanh_x(x) y_onehot = to_one_hot(y, self.num_classes).float() delta = nn.Parameter(torch.zeros_like(x)) optimizer = optim.Adam([delta], lr=self.learning_rate) prevloss = PREV_LOSS_INIT for ii in range(self.max_iterations): # loss, l2distsq, output, adv_img = \ # self._forward_and_update_delta( # optimizer, x_atanh, delta, y_onehot, self.c) optimizer.zero_grad() adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max) transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max) output = self.predict(adv) l2distsq = calc_l2distsq(adv, transimgs_rescale) loss, l2dist, adv_loss = self._loss_fn(output, y_onehot, l2distsq, self.c) loss.backward() optimizer.step() if ii % 1000 == 1: print('step: {}, dis: {:.2f}, loss1: {:.2f}.'.format(ii, l2dist.item(), adv_loss.item())) # if self.abort_early: # if ii % (self.max_iterations // NUM_CHECKS or 1) == 0: # if loss > prevloss * ONE_MINUS_EPS: # break # prevloss = loss final_advs = adv.data return final_advs
def _forward_and_update_delta( self, optimizer, x_atanh, delta, y_onehot, loss_coeffs): optimizer.zero_grad() adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max) transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max) l2distsq = calc_l2distsq(adv, transimgs_rescale) for i in range(len(self.predict.classifiers)): output_i = self.predict.predict(adv, i) output_i = output_i if i == 0 : output = output_i * self.predict.weights[i] else : output = output + output_i * self.predict.weights[i] loss = self._loss_fn(output, y_onehot, l2distsq, loss_coeffs) loss.backward() optimizer.step() return loss.item(), l2distsq.data, output.data, adv.data
def _run_attack(self, x, y, initial_const, taus, prev_adversarials, active): assert len(x) == len(taus) batch_size = len(x) computed_adversarials = x.clone().detach() if self.warm_start: starting_atanh = self._get_arctanh_x(prev_adversarials.clone()) else: starting_atanh = self._get_arctanh_x(x.clone()) modifiers = torch.nn.Parameter(torch.zeros_like(starting_atanh)) # An array of booleans that stores which samples have not converged # yet optimizer = optim.Adam([modifiers], lr=self.learning_rate) const = initial_const j = 0 stop_search = False while (not stop_search) and const < self.max_const: # We add an extra iteration because adversarials are # not saved until the next iteration for k in range(self.max_iterations + 1): # Note: unlike the CPU version, the CUDA version updates and calls the model # on all samples, including inactive ones. However, the filter_ parameter is designed # to force best_sample to only update active samples. This is counter-productive, but # it's necessary in order to have consistent CPU and CUDA implementations outputs, losses = self._outputs_and_loss(x, modifiers, starting_atanh, y, const, taus, filter_=active) adversarials = tanh_rescale(starting_atanh + modifiers, self.clip_min, self.clip_max).detach() replace = torch.ones((batch_size, ), dtype=torch.bool, device=x.device) if not self.update_inactive: replace = replace & active computed_adversarials = utils.fast_boolean_choice( computed_adversarials, adversarials, replace) # Update the modifiers total_loss = torch.sum(losses) #total_loss = torch.sum(losses[active]) # Temp optimizer.zero_grad() total_loss.backward() optimizer.step() # If early aborting is enabled, drop successful # samples with a small loss (the current adversarials # are saved regardless of whether they are dropped) if self.abort_early: successful = self._successful(outputs, y).detach() small_loss = losses < SMALL_LOSS_COEFFICIENT * const active = active & ~(successful & small_loss) if self.inner_check != 0 and (k + 1) % self.inner_check == 0: # Causes an implicit sync point if not active.any(): # Break from both loops stop_search = True break if stop_search: break if self.abort_early and self.const_check != 0 and ( j + 1) % self.const_check == 0: # Causes an implicit sync point if not active.any(): break # Give more weight to the output loss const *= self.const_factor return computed_adversarials
def _run_attack(self, x, y, initial_const, taus, prev_adversarials, outer_active_mask): assert len(x) == len(taus) batch_size = len(x) computed_adversarials = x.clone().detach() if self.warm_start: starting_atanh = self._get_arctanh_x(prev_adversarials.clone()) else: starting_atanh = self._get_arctanh_x(x.clone()) modifiers = torch.nn.Parameter(torch.zeros_like(starting_atanh)) # An array of booleans that stores which samples have not converged # yet active = torch.ones((batch_size, ), dtype=torch.bool, device=x.device) optimizer = optim.Adam([modifiers], lr=self.learning_rate) const = initial_const # Used for best_sample tracking active_mask = outer_active_mask.clone() while torch.any(active) and const < self.max_const: # We add an extra iteration because adversarials # are not saved until the next iteration for _ in range(self.max_iterations + 1): # Only the elements of active_mask where outer_active_mask is True are changed active_mask[outer_active_mask] = active outputs, losses = self._outputs_and_loss( x[active], modifiers[active], starting_atanh[active], y[active], const, taus[active], active_mask=active_mask) adversarials = tanh_rescale( starting_atanh[active] + modifiers[active], self.clip_min, self.clip_max).detach() computed_adversarials[active] = adversarials # Update the modifiers # Note: this will update the modifiers of adversarials that might be # possibly dropped. This is not an issue, since adversarials are detached from # the gradient graph and saved before updating. In other words, the modifiers # will be updated, while the adversarials won't be (at least until the next iteration) total_loss = torch.sum(losses) optimizer.zero_grad() total_loss.backward() optimizer.step() # If early aborting is enabled, drop successful # samples with a small loss (the current adversarials # are saved regardless of whether they are dropped) if self.abort_early: successful = self._successful(outputs, y[active]).detach() small_loss = losses < SMALL_LOSS_COEFFICIENT * const active[active] = ~(successful & small_loss) if not active.any(): break # Give more weight to the output loss const *= self.const_factor return computed_adversarials
def _run_attack(self, x, y, initial_const, taus, prev_adversarials): assert len(x) == len(taus) batch_size = len(x) best_adversarials = x.clone().detach() best_distances = torch.ones( (batch_size, ), device=x.device) * float("inf") if self.warm_start: starting_atanh = self._get_arctanh_x(prev_adversarials.clone()) else: starting_atanh = self._get_arctanh_x(x.clone()) modifiers = torch.nn.Parameter(torch.zeros_like(starting_atanh)) # An array of booleans that stores which samples have not converged # yet active = torch.ones((batch_size, ), dtype=boolean_type, device=x.device) optimizer = optim.Adam([modifiers], lr=self.learning_rate) const = initial_const while torch.any(active) and const < self.max_const: for _ in range(self.max_iterations): optimizer.zero_grad() outputs, loss = self._outputs_and_loss(x[active], modifiers[active], starting_atanh[active], y[active], const, taus[active]) adversarials = tanh_rescale(starting_atanh + modifiers, self.clip_min, self.clip_max).detach() successful = self._successful(outputs, y[active]) if self.return_best: distances = torch.max( torch.abs(x[active] - adversarials[active]).flatten(1), dim=1)[0] better_distance = distances < best_distances[active] replace_active(adversarials[active], best_adversarials, active, successful & better_distance) replace_active(distances, best_distances, active, successful & better_distance) else: best_adversarials[active] = adversarials[active] # If early aborting is enabled, drop successful # samples with a small loss (the current adversarials # are saved regardless of whether they are dropped) if self.abort_early: small_loss = loss < 0.0001 * const drop = successful & small_loss # This workaround avoids modifying "active" # in-place, which would mess with # gradient computation in backwards() active_clone = active.clone() active_clone[active] = ~drop active = active_clone if not active.any(): break # Update the modifiers total_loss = torch.sum(loss) total_loss.backward() optimizer.step() # Give more weight to the output loss const *= self.const_factor return best_adversarials