def normalize_l2_norms(x: ep.Tensor) -> ep.Tensor: norms = flatten(x).norms.l2(axis=-1) norms = ep.maximum(norms, 1e-12) # avoid divsion by zero factor = 1 / norms factor = atleast_kd(factor, x.ndim) return x * factor
def test_rmaximum_scalar(t: Tensor) -> Tensor: return ep.maximum(3, t)
def draw_proposals( bounds: Bounds, originals: ep.Tensor, perturbed: ep.Tensor, unnormalized_source_directions: ep.Tensor, source_directions: ep.Tensor, source_norms: ep.Tensor, spherical_steps: ep.Tensor, source_steps: ep.Tensor, ) -> Tuple[ep.Tensor, ep.Tensor]: # remember the actual shape shape = originals.shape assert perturbed.shape == shape assert unnormalized_source_directions.shape == shape assert source_directions.shape == shape # flatten everything to (batch, size) originals = flatten(originals) perturbed = flatten(perturbed) unnormalized_source_directions = flatten(unnormalized_source_directions) source_directions = flatten(source_directions) N, D = originals.shape assert source_norms.shape == (N,) assert spherical_steps.shape == (N,) assert source_steps.shape == (N,) # draw from an iid Gaussian (we can share this across the whole batch) eta = ep.normal(perturbed, (D, 1)) # make orthogonal (source_directions are normalized) eta = eta.T - ep.matmul(source_directions, eta) * source_directions assert eta.shape == (N, D) # rescale norms = ep.norms.l2(eta, axis=-1) assert norms.shape == (N,) eta = eta * atleast_kd(spherical_steps * source_norms / norms, eta.ndim) # project on the sphere using Pythagoras distances = atleast_kd((spherical_steps.square() + 1).sqrt(), eta.ndim) directions = eta - unnormalized_source_directions spherical_candidates = originals + directions / distances # clip min_, max_ = bounds spherical_candidates = spherical_candidates.clip(min_, max_) # step towards the original inputs new_source_directions = originals - spherical_candidates assert new_source_directions.ndim == 2 new_source_directions_norms = ep.norms.l2(flatten(new_source_directions), axis=-1) # length if spherical_candidates would be exactly on the sphere lengths = source_steps * source_norms # length including correction for numerical deviation from sphere lengths = lengths + new_source_directions_norms - source_norms # make sure the step size is positive lengths = ep.maximum(lengths, 0) # normalize the length lengths = lengths / new_source_directions_norms lengths = atleast_kd(lengths, new_source_directions.ndim) candidates = spherical_candidates + lengths * new_source_directions # clip candidates = candidates.clip(min_, max_) # restore shape candidates = candidates.reshape(shape) spherical_candidates = spherical_candidates.reshape(shape) return candidates, spherical_candidates
def test_maximum_scalar(t: Tensor) -> Tensor: return ep.maximum(t, 3)
def test_maximum(t1: Tensor, t2: Tensor) -> Tensor: return ep.maximum(t1, t2)
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) x, restore_type = ep.astensor_(inputs) criterion_ = get_criterion(criterion) del inputs, criterion, kwargs verify_input_bounds(x, model) N = len(x) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes else: raise ValueError("unsupported criterion") if classes.shape != (N,): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}" ) max_stepsize = 1.0 min_, max_ = model.bounds def loss_fn( inputs: ep.Tensor, labels: ep.Tensor ) -> Tuple[ep.Tensor, ep.Tensor]: logits = model(inputs) sign = -1.0 if targeted else 1.0 loss = sign * ep.crossentropy(logits, labels).sum() return loss, logits grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True) delta = ep.zeros_like(x) epsilon = self.init_epsilon * ep.ones(x, len(x)) worst_norm = ep.norms.l2(flatten(ep.maximum(x - min_, max_ - x)), -1) best_l2 = worst_norm best_delta = delta adv_found = ep.zeros(x, len(x)).bool() for i in range(self.steps): # perform cosine annealing of LR starting from 1.0 to 0.01 stepsize = ( 0.01 + (max_stepsize - 0.01) * (1 + math.cos(math.pi * i / self.steps)) / 2 ) x_adv = x + delta _, logits, gradients = grad_and_logits(x_adv, classes) gradients = normalize_gradient_l2_norms(gradients) is_adversarial = criterion_(x_adv, logits) l2 = ep.norms.l2(flatten(delta), axis=-1) is_smaller = l2 <= best_l2 is_both = ep.logical_and(is_adversarial, is_smaller) adv_found = ep.logical_or(adv_found, is_adversarial) best_l2 = ep.where(is_both, l2, best_l2) best_delta = ep.where(atleast_kd(is_both, x.ndim), delta, best_delta) # do step delta = delta + stepsize * gradients epsilon = epsilon * ep.where( is_adversarial, 1.0 - self.gamma, 1.0 + self.gamma ) epsilon = ep.minimum(epsilon, worst_norm) # project to epsilon ball delta *= atleast_kd(epsilon / ep.norms.l2(flatten(delta), -1), x.ndim) # clip to valid bounds delta = ep.clip(x + delta, *model.bounds) - x x_adv = x + best_delta return restore_type(x_adv)
def project(self, x: ep.Tensor, x0: ep.Tensor, epsilon: ep.Tensor) -> ep.Tensor: clipped = ep.maximum(flatten(x - x0).T, -epsilon) clipped = ep.minimum(clipped, epsilon).T return x0 + clipped.reshape(x0.shape)
def run( self, model: Model, inputs: T, criterion: Union[Misclassification, TargetedMisclassification, T], *, starting_points: Optional[ep.Tensor] = None, early_stop: Optional[float] = None, **kwargs: Any, ) -> T: raise_if_kwargs(kwargs) criterion_ = get_criterion(criterion) if isinstance(criterion_, Misclassification): targeted = False classes = criterion_.labels elif isinstance(criterion_, TargetedMisclassification): targeted = True classes = criterion_.target_classes else: raise ValueError("unsupported criterion") def loss_fn( inputs: ep.Tensor, labels: ep.Tensor ) -> Tuple[ep.Tensor, Tuple[ep.Tensor, ep.Tensor]]: logits = model(inputs) if targeted: c_minimize = best_other_classes(logits, labels) c_maximize = labels # target_classes else: c_minimize = labels # labels c_maximize = best_other_classes(logits, labels) loss = logits[rows, c_minimize] - logits[rows, c_maximize] return -loss.sum(), (logits, loss) x, restore_type = ep.astensor_(inputs) del inputs, criterion, kwargs N = len(x) initialized = False # start from initialization points/attack if starting_points is not None: x1 = starting_points initialized = True else: if self.init_attack is not None: x1 = self.init_attack.run(model, x, criterion_) initialized = True # if initial points or initialization attacks are provided, # search for the boundary if initialized is True: is_adv = get_is_adversarial(criterion_, model) assert is_adv(x1).all() lower_bound = ep.zeros(x, shape=(N, )) upper_bound = ep.ones(x, shape=(N, )) for _ in range(self.binary_search_steps): epsilons = (lower_bound + upper_bound) / 2 mid_points = self.mid_points(x, x1, epsilons, model.bounds) is_advs = is_adv(mid_points) lower_bound = ep.where(is_advs, lower_bound, epsilons) upper_bound = ep.where(is_advs, epsilons, upper_bound) starting_points = self.mid_points(x, x1, upper_bound, model.bounds) delta = starting_points - x else: # start from x0 delta = ep.zeros_like(x) if classes.shape != (N, ): name = "target_classes" if targeted else "labels" raise ValueError( f"expected {name} to have shape ({N},), got {classes.shape}") min_, max_ = model.bounds rows = range(N) grad_and_logits = ep.value_and_grad_fn(x, loss_fn, has_aux=True) if self.p != 0: epsilon = ep.inf * ep.ones(x, len(x)) else: epsilon = ep.maximum(ep.ones(x, len(x)), ep.norms.l0(flatten(delta), axis=-1)) if self.p != 0: worst_norm = ep.norms.lp(flatten(ep.maximum(x - min_, max_ - x)), p=self.p, axis=-1) else: worst_norm = flatten(ep.ones_like(x)).bool().sum(axis=1).float32() best_lp = worst_norm best_delta = delta adv_found = ep.zeros(x, len(x)).bool() for i in range(self.steps): # perform cosine annealing of learning rates stepsize = (self.min_stepsize + (self.max_stepsize - self.min_stepsize) * (1 + math.cos(math.pi * i / self.steps)) / 2) gamma = (0.001 + (self.gamma - 0.001) * (1 + math.cos(math.pi * (i / self.steps))) / 2) x_adv = x + delta loss, (logits, loss_batch), gradients = grad_and_logits(x_adv, classes) is_adversarial = criterion_(x_adv, logits) lp = ep.norms.lp(flatten(delta), p=self.p, axis=-1) is_smaller = lp <= best_lp is_both = ep.logical_and(is_adversarial, is_smaller) adv_found = ep.logical_or(adv_found, is_adversarial) best_lp = ep.where(is_both, lp, best_lp) best_delta = ep.where(atleast_kd(is_both, x.ndim), delta, best_delta) # update epsilon if self.p != 0: distance_to_boundary = abs(loss_batch) / ep.norms.lp( flatten(gradients), p=self.dual, axis=-1) epsilon = ep.where( is_adversarial, ep.minimum( epsilon * (1 - gamma), ep.norms.lp(flatten(best_delta), p=self.p, axis=-1), ), ep.where( adv_found, epsilon * (1 + gamma), ep.norms.lp(flatten(delta), p=self.p, axis=-1) + distance_to_boundary, ), ) else: epsilon = ep.where( is_adversarial, ep.minimum( ep.minimum( epsilon - 1, (epsilon * (1 - gamma)).astype( ep.arange(x, 1).dtype).astype(epsilon.dtype), ), ep.norms.lp(flatten(best_delta), p=self.p, axis=-1), ), ep.maximum( epsilon + 1, (epsilon * (1 + gamma)).astype(ep.arange( x, 1).dtype).astype(epsilon.dtype), ), ) epsilon = ep.maximum(1, epsilon).astype(epsilon.dtype) # clip epsilon epsilon = ep.minimum(epsilon, worst_norm) # computes normalized gradient update grad_ = self.normalize(gradients) * stepsize # do step delta = delta + grad_ # project according to the given norm delta = self.project(x=x + delta, x0=x, epsilon=epsilon) - x # clip to valid bounds delta = ep.clip(x + delta, *model.bounds) - x x_adv = x + best_delta return restore_type(x_adv)