def f(layer1, layer2, X1, X2): # Get activations of full positive or negative part. Z1 = kutils.apply(layer1, X1) Z2 = kutils.apply(layer2, X2) Zs = [ tensorflow.keras.layers.Add()([a, b]) for a, b in zip(Z1, Z2) ] # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to the input neurons # using the gradient tmp1 = iutils.to_list(grad(X1 + Z1 + tmp)) tmp2 = iutils.to_list(grad(X2 + Z2 + tmp)) # Re-weight relevance with the input values. tmp1 = [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(X1, tmp1) ] tmp2 = [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(X2, tmp2) ] #combine and return return [ tensorflow.keras.layers.Add()([a, b]) for a, b in zip(tmp1, tmp2) ]
def apply(self, Xs, Ys, Rs, reverse_state): #this method is correct, but wasteful grad = ilayers.GradientWRT(len(Xs)) times_alpha = tensorflow.keras.layers.Lambda(lambda x: x * self._alpha) times_beta = tensorflow.keras.layers.Lambda(lambda x: x * self._beta) keep_positives = tensorflow.keras.layers.Lambda( lambda x: x * K.cast(K.greater(x, 0), K.floatx())) keep_negatives = tensorflow.keras.layers.Lambda( lambda x: x * K.cast(K.less(x, 0), K.floatx())) def f(layer1, layer2, X1, X2): # Get activations of full positive or negative part. Z1 = kutils.apply(layer1, X1) Z2 = kutils.apply(layer2, X2) Zs = [ tensorflow.keras.layers.Add()([a, b]) for a, b in zip(Z1, Z2) ] # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to the input neurons # using the gradient tmp1 = iutils.to_list(grad(X1 + Z1 + tmp)) tmp2 = iutils.to_list(grad(X2 + Z2 + tmp)) # Re-weight relevance with the input values. tmp1 = [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(X1, tmp1) ] tmp2 = [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(X2, tmp2) ] #combine and return return [ tensorflow.keras.layers.Add()([a, b]) for a, b in zip(tmp1, tmp2) ] # Distinguish postive and negative inputs. Xs_pos = kutils.apply(keep_positives, Xs) Xs_neg = kutils.apply(keep_negatives, Xs) # xpos*wpos + xneg*wneg activator_relevances = f(self._layer_wo_act_positive, self._layer_wo_act_negative, Xs_pos, Xs_neg) if self._beta: #only compute beta-weighted contributions of beta is not zero # xpos*wneg + xneg*wpos inhibitor_relevances = f(self._layer_wo_act_negative, self._layer_wo_act_positive, Xs_pos, Xs_neg) return [ tensorflow.keras.layers.Subtract()( [times_alpha(a), times_beta(b)]) for a, b in zip(activator_relevances, inhibitor_relevances) ] else: return activator_relevances
def apply(self, Xs, Ys, Rs, reverse_state): ##print(" in BatchNormalizationReverseLayer.apply:", reverse_state['layer'].__class__.__name__, '(nid: {})'.format(reverse_state['nid'])) input_shape = [K.int_shape(x) for x in Xs] if len(input_shape) != 1: #extend below lambda layers towards multiple parameters. raise ValueError( "BatchNormalizationReverseLayer expects Xs with len(Xs) = 1, but was len(Xs) = {}" .format(len(Xs))) input_shape = input_shape[0] # prepare broadcasting shape for layer parameters broadcast_shape = [1] * len(input_shape) broadcast_shape[self._axis] = input_shape[self._axis] broadcast_shape[0] = -1 #reweight relevances as # x * (y - beta) R # Rin = ---------------- * ---- # x - mu y # batch norm can be considered as 3 distinct layers of subtraction, # multiplication and then addition. The multiplicative scaling layer # has no effect on LRP and functions as a linear activation layer minus_mu = tensorflow.keras.layers.Lambda( lambda x: x - K.reshape(self._mean, broadcast_shape)) minus_beta = tensorflow.keras.layers.Lambda( lambda x: x - K.reshape(self._beta, broadcast_shape)) prepare_div = tensorflow.keras.layers.Lambda(lambda x: x + (K.cast( K.greater_equal(x, 0), K.floatx()) * 2 - 1) * K.epsilon()) x_minus_mu = kutils.apply(minus_mu, Xs) if self._center: y_minus_beta = kutils.apply(minus_beta, Ys) else: y_minus_beta = Ys numerator = [ tensorflow.keras.layers.Multiply()([x, ymb, r]) for x, ymb, r in zip(Xs, y_minus_beta, Rs) ] denominator = [ tensorflow.keras.layers.Multiply()([xmm, y]) for xmm, y in zip(x_minus_mu, Ys) ] return [ ilayers.SafeDivide()([n, prepare_div(d)]) for n, d in zip(numerator, denominator) ]
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) to_low = tensorflow.keras.layers.Lambda(lambda x: x * 0 + self._low) to_high = tensorflow.keras.layers.Lambda(lambda x: x * 0 + self._high) low = [to_low(x) for x in Xs] high = [to_high(x) for x in Xs] # Get values for the division. A = kutils.apply(self._layer_wo_act, Xs) B = kutils.apply(self._layer_wo_act_positive, low) C = kutils.apply(self._layer_wo_act_negative, high) Zs = [ tensorflow.keras.layers.Subtract()( [a, tensorflow.keras.layers.Add()([b, c])]) for a, b, c in zip(A, B, C) ] # Divide relevances with the value. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Distribute along the gradient. tmpA = iutils.to_list(grad(Xs + A + tmp)) tmpB = iutils.to_list(grad(low + B + tmp)) tmpC = iutils.to_list(grad(high + C + tmp)) tmpA = [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmpA) ] tmpB = [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(low, tmpB) ] tmpC = [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(high, tmpC) ] tmp = [ tensorflow.keras.layers.Subtract()( [a, tensorflow.keras.layers.Add()([b, c])]) for a, b, c in zip(tmpA, tmpB, tmpC) ] return tmp
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) # Create dummy forward path to take the derivative below. Ys = kutils.apply(self._layer_wo_act_b, Xs) # Compute the sum of the weights. ones = ilayers.OnesLike()(Xs) Zs = iutils.to_list(self._layer_wo_act_b(ones)) # Weight the incoming relevance. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Redistribute the relevances along the gradient. tmp = iutils.to_list(grad(Xs + Ys + tmp)) return tmp
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) # Get activations. Zs = kutils.apply(self._layer_wo_act, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) # Re-weight relevance with the input values. return [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp) ]
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) #TODO: assert all inputs are positive, instead of only keeping the positives. #keep_positives = tensorflow.keras.layers.Lambda(lambda x: x * K.cast(K.greater(x,0), K.floatx())) #Xs = kutils.apply(keep_positives, Xs) # Get activations. Zs = kutils.apply(self._layer_wo_act_b_positive, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) # Re-weight relevance with the input values. return [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp) ]
def apply(self, Xs, Ys, Rs, reverse_state): grad = ilayers.GradientWRT(len(Xs)) # The epsilon rule aligns epsilon with the (extended) sign: 0 is considered to be positive prepare_div = tensorflow.keras.layers.Lambda(lambda x: x + (K.cast( K.greater_equal(x, 0), K.floatx()) * 2 - 1) * self._epsilon) # Get activations. Zs = kutils.apply(self._layer_wo_act, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.Divide()([a, prepare_div(b)]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) # Re-weight relevance with the input values. return [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp) ]
def apply(self, Xs, Ys, Rs, reverse_state): # the outputs of the pooling operation at each location is the sum of its inputs. # the forward message must be known in this case, and are the inputs for each pooling thing. # the gradient is 1 for each output-to-input connection, which corresponds to the "weights" # of the layer. It should thus be sufficient to reweight the relevances and and do a gradient_wrt grad = ilayers.GradientWRT(len(Xs)) # Get activations. Zs = kutils.apply(self._layer_wo_act, Xs) # Divide incoming relevance by the activations. tmp = [ilayers.SafeDivide()([a, b]) for a, b in zip(Rs, Zs)] # Propagate the relevance to input neurons # using the gradient. tmp = iutils.to_list(grad(Xs + Zs + tmp)) # Re-weight relevance with the input values. return [ tensorflow.keras.layers.Multiply()([a, b]) for a, b in zip(Xs, tmp) ]