def search(self, query, return_k=10, search_budget=100): """ search """ ret_id = np.zeros(return_k, dtype=np.uint64) ret_score = np.zeros(return_k, dtype=np.float64) if paddle.is_tensor(query): query = query.numpy() if self.dist_type == "IP": search_mobius_index(query, self.dim, search_budget, return_k, ctypes.byref(self.index_context), ret_id, ret_score) else: search_l2_index(query, self.dim, search_budget, return_k, ctypes.byref(self.index_context), ret_id, ret_score) ret_id = ret_id.tolist() ret_doc = [] if self.with_attr: for i in range(return_k): ret_doc.append(self.gallery_doc_dict[str(ret_id[i])]) return ret_score, ret_doc else: return ret_score, ret_id
def RAP_relprop(self, R_p): def backward(R_p): Z = [] for _ in range(self.num): Z.append(self.X) Spp = [] Spn = [] for z, rp, rn in zip(Z, R_p): Spp.append(safe_divide(paddle.clip(rp, min=0), z)) Spn.append(safe_divide(paddle.clip(rp, max=0), z)) Cpp = self.gradprop(Z, self.X, Spp)[0] Cpn = self.gradprop(Z, self.X, Spn)[0] Rp = self.X * (Cpp * Cpn) return Rp if paddle.is_tensor(R_p) == False: idx = len(R_p) tmp_R_p = R_p Rp = [] for i in range(idx): Rp_tmp = backward(tmp_R_p[i]) Rp.append(Rp_tmp) else: Rp = backward(R_p) return Rp
def compute(self, pred, label): """ Accepts network's output and the labels, and calculates the top-k (maximum value in topk) indices for accuracy. Args: pred (Tensor): Predicted tensor, and its dtype is float32 or float64, and has a shape of [batch_size, *, num_labels]. label (Tensor): The ground truth tensor, and its dtype is is int64, and has a shape of [batch_size, *] or [batch_size, *, num_labels] in one hot representation. Returns: tuple of Tensor: it contains two Tensor of shape [*, 1]. The tuple should be passed to `update` function. """ if not (paddle.is_tensor(pred) and paddle.is_tensor(label)): raise ValueError('pred and label must be paddle tensor') if pred.shape[-1] != self.num_labels: raise ValueError(f'The last dim of pred is {pred.shape[-1]}, ' f'which should be num_labels') pred = paddle.reshape(pred, [-1, self.num_labels]) pred = paddle.argmax(pred, axis=-1) if label.shape[-1] == self.num_labels: label = paddle.reshape(label, [-1, self.num_labels]) label = paddle.argmax(label, axis=-1) else: label = paddle.reshape(label, [-1]) if paddle.max(label) >= self.num_labels: raise ValueError( f"Tensor label has value {paddle.max(label)}, " f"which is no less than num_labels") if pred.shape[0] != label.shape[0]: raise ValueError( f"The length of pred is not equal to the length of label") return pred, label
def relprop(self, R, alpha): Z = self.forward(self.X) S = safe_divide(R, Z) C = self.gradprop(Z, self.X, S)[0] if paddle.is_tensor(self.X) == False: outputs = [] outputs.append(self.X[0] * C) outputs.append(self.X[1] * C) else: outputs = self.X * (C) return outputs
def backward(R_p): Z = self.forward(self.X) Sp = safe_divide(R_p, Z) Cp = self.gradprop(Z, self.X, Sp)[0] if paddle.is_tensor(self.X) == False: Rp = [] Rp.append(self.X[0] * Cp) Rp.append(self.X[1] * Cp) else: Rp = self.X * (Cp) return Rp
def m_relprop(self, R, pred, alpha): x = self.classifier.m_relprop(R, pred, alpha) if paddle.is_tensor(x) == False: for i in range(len(x)): x[i] = x[i].reshape( next(reversed(self.features._sub_layers.values())).Y.shape) else: x = x.reshape( next(reversed(self.features._sub_layers.values())).Y.shape) x = self.avgpool.m_relprop(x, pred, alpha) x = self.features.m_relprop(x, pred, alpha) return x
def RAP_relprop(self, R): x1 = self.classifier.RAP_relprop(R) if paddle.is_tensor(x1) == False: for i in range(len(x1)): x1[i] = x1[i].reshape( next(reversed(self.features._sub_layers.values())).Y.shape) else: x1 = x1.reshape( next(reversed(self.features._sub_layers.values())).Y.shape) x1 = self.avgpool.RAP_relprop(x1) x1 = self.features.RAP_relprop(x1) return x1
def backward(R_p): X = self.X weight = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3) / ( (self._variance.unsqueeze(0).unsqueeze(2).unsqueeze(3).pow(2) + self.eps).pow(0.5)) if paddle.is_tensor(self.bias): bias = self.bias.unsqueeze(-1).unsqueeze(-1) bias_p = safe_divide( bias * R_p.not_equal(ZERO_TENSOR).astype(self.bias.dtype), R_p.not_equal(ZERO_TENSOR).astype(self.bias.dtype).sum( dim=[2, 3], keepdim=True)) R_p = R_p - bias_p Rp = f(R_p, weight, X) if paddle.is_tensor(self.bias): Bp = f(bias_p, weight, X) Rp = Rp + Bp return Rp
def forward(ctx, run_function, preserve_rng_state, *args): if framework._dygraph_tracer()._has_grad: check_recompute_necessary(args) # store for recomputing ctx.run_function = run_function ctx.preserve_rng_state = preserve_rng_state # NOTE the number of outputs of backward() should be equal to the number of tensors in forward()'s input # the order of tensors in backward()'s output should be the same as tensors in forward()'s input # None tensor inputs will be filtered in backward inputs. # save input for backward ctx.inputs = [] ctx.tensor_indices = [] tensor_inputs = [] for i, arg in enumerate(args): if paddle.is_tensor(arg): tensor_inputs.append(arg) ctx.tensor_indices.append(i) ctx.inputs.append(None) else: ctx.inputs.append(arg) ctx.save_for_backward(*tensor_inputs) # NOTE recompute with restore RNG only support one senario where one process for one cuda gpu. # one process with multiple gpu and mix-gpu-cpu senarios are not support if ctx.preserve_rng_state: cur_device = paddle.get_device() if 'gpu:' not in cur_device: raise RuntimeError( "Recompute with RNG perserve is not support current device: {}.". format(cur_device)) ctx.fw_cuda_rng_state = paddle.get_cuda_rng_state() # TODO support AMP tracer = framework._dygraph_tracer() ctx.is_fw_autocast = False if tracer._amp_level == core.AmpLevel.O0 else True if tracer._amp_level == core.AmpLevel.O2: ctx.amp_level = 'O2' elif tracer._amp_level in (core.AmpLevel.O1, core.AmpLevel.O0): ctx.amp_level = 'O1' else: raise ValueError("unsupported amp level: {}".format( tracer._amp_level)) ctx.amp_white_list, ctx.amp_black_list = tracer._get_amp_op_list() with paddle.no_grad(): outputs = run_function(*args) return outputs
def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5): """Plot a list of images.""" figsize = (num_cols * scale, num_rows * scale) _, axes = plt.subplots(num_rows, num_cols, figsize=figsize) axes = axes.flatten() for i, (ax, img) in enumerate(zip(axes, imgs)): if paddle.is_tensor(img): # 图片张量 ax.imshow(img.numpy()) else: # PIL图片 ax.imshow(img) ax.axes.get_xaxis().set_visible(False) ax.axes.get_yaxis().set_visible(False) if titles: ax.set_title(titles[i]) return axes
def _hp_recompute(function, *args): # NODTE(shenliang03)The current hybrid parallel recompute has limitations. # It cannot handle the following situations: # 1. The calculation output of recompute, there are tensors that do not require gradients. # 2. The forward output tensor has no gradient. This problem can be solved temporarily by detach(). # 3. Here, we only use float dtype to distinguish whether a gradient is needed in output tensor all_outputs = [] _HPRecomputeFunction.apply(function, all_outputs, *args) if len(all_outputs) == 1: return all_outputs[0] else: for output in all_outputs: if paddle.is_tensor(output) and not is_float_tensor(output): output.stop_gradient = True return tuple(all_outputs)
def m_relprop(self, R, pred, alpha): R = self.fc.m_relprop(R, pred, alpha) if paddle.is_tensor(R) == False: for i in range(len(R)): R[i] = R[i].reshape_as(self.avgpool.Y) else: R = R.reshape_as(self.avgpool.Y) R = self.avgpool.m_relprop(R, pred, alpha) R = self.layer4.m_relprop(R, pred, alpha) R = self.layer3.m_relprop(R, pred, alpha) R = self.layer2.m_relprop(R, pred, alpha) R = self.layer1.m_relprop(R, pred, alpha) R = self.maxpool.m_relprop(R, pred, alpha) R = self.relu.m_relprop(R, pred, alpha) R = self.bn1.m_relprop(R, pred, alpha) R = self.conv1.m_relprop(R, pred, alpha) return R
def RAP_relprop(self, R_p): def f(R, w1, x1): Z1 = x1 * w1 S1 = safe_divide(R, Z1) * w1 C1 = x1 * S1 return C1 def backward(R_p): X = self.X weight = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3) / ( (self._variance.unsqueeze(0).unsqueeze(2).unsqueeze(3).pow(2) + self.eps).pow(0.5)) if paddle.is_tensor(self.bias): bias = self.bias.unsqueeze(-1).unsqueeze(-1) bias_p = safe_divide( bias * R_p.not_equal(ZERO_TENSOR).astype(self.bias.dtype), R_p.not_equal(ZERO_TENSOR).astype(self.bias.dtype).sum( dim=[2, 3], keepdim=True)) R_p = R_p - bias_p Rp = f(R_p, weight, X) if paddle.is_tensor(self.bias): Bp = f(bias_p, weight, X) Rp = Rp + Bp return Rp if paddle.is_tensor(R_p) == False: idx = len(R_p) tmp_R_p = R_p Rp = [] for i in range(idx): Rp_tmp = backward(tmp_R_p[i]) Rp.append(Rp_tmp) else: Rp = backward(R_p) return Rp
def m_relprop(self, R, pred, alpha): out = self.relu3.m_relprop(R, pred, alpha) out, x = self.add.m_relprop(out, pred, alpha) if self.downsample is not None: x = self.downsample.m_relprop(x, pred, alpha) out = self.bn3.m_relprop(out, pred, alpha) out = self.conv3.m_relprop(out, pred, alpha) out = self.relu2.m_relprop(out, pred, alpha) out = self.bn2.m_relprop(out, pred, alpha) out = self.conv2.m_relprop(out, pred, alpha) out = self.relu1.m_relprop(out, pred, alpha) out = self.bn1.m_relprop(out, pred, alpha) x1 = self.conv1.m_relprop(out, pred, alpha) if paddle.is_tensor(x1) == True: return x1 + x else: for i in range(len(x1)): x1[i] = x1[i] + x[i] return x1
def RAP_relprop(self, R_p): def backward(R_p): Z = self.forward(self.X, self.dim) Sp = safe_divide(R_p, Z) Cp = self.gradprop(Z, self.X, Sp) Rp = [] for x, cp in zip(self.X, Cp): Rp.append(x * (cp)) return Rp if paddle.is_tensor(R_p) == False: idx = len(R_p) tmp_R_p = R_p Rp = [] for i in range(idx): Rp_tmp = backward(tmp_R_p[i]) Rp.append(Rp_tmp) else: Rp = backward(R_p) return Rp
def RAP_relprop(self, R_p): def backward(R_p): Z = self.forward(self.X) Sp = safe_divide(R_p, Z) Cp = self.gradprop(Z, self.X, Sp)[0] if paddle.is_tensor(self.X) == False: Rp = [] Rp.append(self.X[0] * Cp) Rp.append(self.X[1] * Cp) else: Rp = self.X * (Cp) return Rp if paddle.is_tensor(R_p) == False: idx = len(R_p) tmp_R_p = R_p Rp = [] for i in range(idx): Rp_tmp = backward(tmp_R_p[i]) Rp.append(Rp_tmp) else: Rp = backward(R_p) return Rp
def forward(ctx, run_function, all_outputs, *args): check_recompute_necessary(args) # store for recomputing ctx.run_function = run_function # store the rng states ctx.fwd_cuda_rng_state = paddle.get_cuda_rng_state() ctx.fwd_cuda_rng_state_tracker = get_rng_state_tracker( ).get_states_tracker() # save input for backward ctx.inputs = [] ctx.tensor_indices = [] ctx.tensor_shapes = [] tensor_inputs = [] cur_device = paddle.get_device() assert 'gpu:' in paddle.get_device( ), "Recompute with RNG is not support current device: {}.".format( cur_device) # TODO support AMP tracer = framework._dygraph_tracer() ctx.is_fw_autocast = False if tracer._amp_level == core.AmpLevel.O0 else True if tracer._amp_level == core.AmpLevel.O2: ctx.amp_level = 'O2' elif tracer._amp_level in (core.AmpLevel.O1, core.AmpLevel.O0): ctx.amp_level = 'O1' else: raise ValueError("unsupported amp level: {}".format( tracer._amp_level)) ctx.amp_white_list, ctx.amp_black_list = tracer._get_amp_op_list() with paddle.no_grad(): outputs = run_function(*args) for i, arg in enumerate(args): if paddle.is_tensor(arg): state = arg.stop_gradient if _recompute_partition: ctx.tensor_shapes.append(arg.shape) partition = _split_activation(arg.detach()).clone() # TODO(shenliang03) not use calculate stream to D2H to speed arg = partition.cpu() if _recompute_offload else partition else: arg = arg.cpu() if _recompute_offload else arg arg.stop_gradient = state tensor_inputs.append(arg) ctx.tensor_indices.append(i) ctx.inputs.append(None) else: ctx.inputs.append(arg) ctx.save_for_backward(*tensor_inputs) if paddle.is_tensor(outputs): all_outputs += [outputs] return outputs else: all_outputs += outputs return tuple(outputs)
def make_grid( tensor: Union[paddle.Tensor, List[paddle.Tensor]], nrow: int = 8, normalize: bool = False, range: Optional[Tuple[int, int]] = None, scale_each: bool = False, pad_value: int = 0, ) -> paddle.Tensor: """Make a grid of images. Args: tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W) or a list of images all of the same size. nrow (int, optional): Number of images displayed in each row of the grid. The final grid size is ``(B / nrow, nrow)``. Default: ``8``. normalize (bool, optional): If True, shift the image to the range (0, 1), by the min and max values specified by :attr:`range`. Default: ``False``. range (tuple, optional): tuple (min, max) where min and max are numbers, then these numbers are used to normalize the image. By default, min and max are computed from the tensor. scale_each (bool, optional): If ``True``, scale each image in the batch of images separately rather than the (min, max) over all images. Default: ``False``. pad_value (float, optional): Value for the padded pixels. Default: ``0``. Example: See this notebook `here <https://gist.github.com/anonymous/bf16430f7750c023141c562f3e9f2a91>`_ """ if not (paddle.is_tensor(tensor) or (isinstance(tensor, list) and all(paddle.is_tensor(t) for t in tensor))): raise TypeError('tensor or list of tensors expected, got {}'.format( type(tensor))) # if list of tensors, convert to a 4D mini-batch Tensor if isinstance(tensor, list): tensor = paddle.stack(tensor, 0) if tensor.dim() == 2: # single image H x W tensor = tensor.unsqueeze(0) if tensor.dim() == 3: # single image if tensor.shape[0] == 1: # if single-channel, convert to 3-channel tensor = paddle.concat([tensor, tensor, tensor], 0) tensor = tensor.unsqueeze(0) if tensor.dim() == 4 and tensor.shape[1] == 1: # single-channel images tensor = paddle.concat([tensor, tensor, tensor], 1) if normalize is True: tensor = tensor.astype(tensor.dtype) # avoid modifying tensor in-place if range is not None: assert isinstance(range, tuple), \ "range has to be a tuple (min, max) if specified. min and max are numbers" def norm_ip(img, min, max): img[:] = img.clip(min=min, max=max) img[:] = (img - min) / (max - min + 1e-5) def norm_range(t, range): if range is not None: norm_ip(t, range[0], range[1]) else: norm_ip(t, float(t.min()), float(t.max())) if scale_each is True: for t in tensor: # loop over mini-batch dimension norm_range(t, range) else: norm_range(tensor, range) if tensor.shape[0] == 1: return tensor.squeeze(0) # make the mini-batch of images into a grid nmaps = tensor.shape[0] xmaps = min(nrow, nmaps) ymaps = int(math.ceil(float(nmaps) / xmaps)) height, width = int(tensor.shape[2]), int(tensor.shape[3]) num_channels = tensor.shape[1] canvas = paddle.zeros((num_channels, height * ymaps, width * xmaps), dtype=tensor.dtype) + pad_value k = 0 for y in irange(ymaps): for x in irange(xmaps): if k >= nmaps: break canvas[:, y * height:(y + 1) * height, x * width:(x + 1) * width] = tensor[k] k = k + 1 return canvas
def test_is_tensor_number(self, dtype="float32"): """Test is_tensor api with a number """ paddle.disable_static() x = 5 self.assertFalse(paddle.is_tensor(x))
def test_is_tensor_list(self, dtype="float32"): """Test is_tensor api with a list """ paddle.disable_static() x = [1, 2, 3] self.assertFalse(paddle.is_tensor(x))
def test_is_tensor_real(self, dtype="float32"): """Test is_tensor api with a real tensor """ paddle.disable_static() x = paddle.rand([3, 2, 4], dtype=dtype) self.assertTrue(paddle.is_tensor(x))
def num(tensor_like): if paddle.is_tensor(tensor_like): return tensor_like.detach().cpu().numpy().item() return tensor_like
def summary(net, input_size=None, dtypes=None, input=None): """Prints a string summary of the network. Args: net (Layer): the network which must be a subinstance of Layer. input_size (tuple|InputSpec|list[tuple|InputSpec]): size of input tensor. if model only have one input, input_size can be tuple or InputSpec. if model have multiple input, input_size must be a list which contain every input's shape. Note that input_size only dim of batch_size can be None or -1. Default: None. Note that input_size and input cannot be None at the same time. dtypes (str, optional): if dtypes is None, 'float32' will be used, Default: None. input: the input tensor. if input is given, input_size and dtype will be ignored, Default: None. Returns: Dict: a summary of the network including total params and total trainable params. Examples: .. code-block:: python import paddle import paddle.nn as nn class LeNet(nn.Layer): def __init__(self, num_classes=10): super(LeNet, self).__init__() self.num_classes = num_classes self.features = nn.Sequential( nn.Conv2D( 1, 6, 3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2D(2, 2), nn.Conv2D( 6, 16, 5, stride=1, padding=0), nn.ReLU(), nn.MaxPool2D(2, 2)) if num_classes > 0: self.fc = nn.Sequential( nn.Linear(400, 120), nn.Linear(120, 84), nn.Linear( 84, 10)) def forward(self, inputs): x = self.features(inputs) if self.num_classes > 0: x = paddle.flatten(x, 1) x = self.fc(x) return x lenet = LeNet() params_info = paddle.summary(lenet, (1, 1, 28, 28)) print(params_info) # multi input demo class LeNetMultiInput(LeNet): def forward(self, inputs, y): x = self.features(inputs) if self.num_classes > 0: x = paddle.flatten(x, 1) x = self.fc(x + y) return x lenet_multi_input = LeNetMultiInput() params_info = paddle.summary(lenet_multi_input, [(1, 1, 28, 28), (1, 400)], dtypes=['float32', 'float32']) print(params_info) # list input demo class LeNetListInput(LeNet): def forward(self, inputs): x = self.features(inputs[0]) if self.num_classes > 0: x = paddle.flatten(x, 1) x = self.fc(x + inputs[1]) return x lenet_list_input = LeNetListInput() input_data = [paddle.rand([1, 1, 28, 28]), paddle.rand([1, 400])] params_info = paddle.summary(lenet_list_input, input=input_data) print(params_info) # dict input demo class LeNetDictInput(LeNet): def forward(self, inputs): x = self.features(inputs['x1']) if self.num_classes > 0: x = paddle.flatten(x, 1) x = self.fc(x + inputs['x2']) return x lenet_dict_input = LeNetDictInput() input_data = {'x1': paddle.rand([1, 1, 28, 28]), 'x2': paddle.rand([1, 400])} params_info = paddle.summary(lenet_dict_input, input=input_data) print(params_info) """ if input_size is None and input is None: raise ValueError( "input_size and input cannot be None at the same time") if input_size is None and input is not None: if paddle.is_tensor(input): input_size = tuple(input.shape) elif isinstance(input, (list, tuple)): input_size = [] for x in input: input_size.append(tuple(x.shape)) elif isinstance(input, dict): input_size = [] for key in input.keys(): input_size.append(tuple(input[key].shape)) elif isinstance(input, paddle.fluid.framework.Variable): input_size = tuple(input.shape) else: raise ValueError( "Input is not tensor, list, tuple and dict, unable to determine input_size, please input input_size." ) if isinstance(input_size, InputSpec): _input_size = tuple(input_size.shape) elif isinstance(input_size, list): _input_size = [] for item in input_size: if isinstance(item, int): item = (item, ) assert isinstance(item, (tuple, InputSpec)), 'When input_size is list, \ expect item in input_size is a tuple or InputSpec, but got {}'.format( type(item)) if isinstance(item, InputSpec): _input_size.append(tuple(item.shape)) else: _input_size.append(item) elif isinstance(input_size, int): _input_size = (input_size, ) else: _input_size = input_size if not paddle.in_dynamic_mode(): warnings.warn( "Your model was created in static mode, this may not get correct summary information!" ) in_train_mode = False else: in_train_mode = net.training if in_train_mode: net.eval() def _is_shape(shape): for item in shape: if isinstance(item, (list, tuple)): return False return True def _check_shape(shape): num_unknown = 0 new_shape = [] for i in range(len(shape)): item = shape[i] if item is None or item == -1: num_unknown += 1 if num_unknown > 1: raise ValueError( 'Option input_size only the dim of batch_size can be None or -1.' ) item = 1 elif isinstance(item, numbers.Number): if item <= 0: raise ValueError( "Expected element in input size greater than zero, but got {}" .format(item)) new_shape.append(item) return tuple(new_shape) def _check_input(input_size): if isinstance(input_size, (list, tuple)) and _is_shape(input_size): return _check_shape(input_size) else: return [_check_input(i) for i in input_size] _input_size = _check_input(_input_size) result, params_info = summary_string(net, _input_size, dtypes, input) print(result) if in_train_mode: net.train() return params_info
def RAP_relprop(self, R_p): def shift_rel(R, R_val): R_nonzero = paddle.not_equal(R, ZERO_TENSOR).astype(R.dtype) shift = safe_divide(R_val, paddle.sum(R_nonzero, dim=-1, keepdim=True)) * paddle.not_equal( R, ZERO_TENSOR).astype(R.dtype) K = R - shift return K def pos_prop(R, Za1, Za2, x1): R_pos = paddle.clip(R, min=0) R_neg = paddle.clip(R, max=0) S1 = safe_divide((R_pos * safe_divide((Za1 + Za2), Za1 + Za2)), Za1) C1 = x1 * self.gradprop(Za1, x1, S1)[0] S1n = safe_divide((R_neg * safe_divide((Za1 + Za2), Za1 + Za2)), Za1) C1n = x1 * self.gradprop(Za1, x1, S1n)[0] S2 = safe_divide((R_pos * safe_divide((Za2), Za1 + Za2)), Za2) C2 = x1 * self.gradprop(Za2, x1, S2)[0] S2n = safe_divide((R_neg * safe_divide((Za2), Za1 + Za2)), Za2) C2n = x1 * self.gradprop(Za2, x1, S2n)[0] Cp = C1 + C2 Cn = C2n + C1n C = (Cp + Cn) C = shift_rel( C, C.sum(dim=-1, keepdim=True) - R.sum(dim=-1, keepdim=True)) return C def f(R, w1, w2, x1, x2): R_nonzero = R.not_equal(ZERO_TENSOR).astype(R.dtype) Za1 = F.linear(x1, w1) * R_nonzero Za2 = -F.linear(x1, w2) * R_nonzero Zb1 = -F.linear(x2, w1) * R_nonzero Zb2 = F.linear(x2, w2) * R_nonzero C1 = pos_prop(R, Za1, Za2, x1) C2 = pos_prop(R, Zb1, Zb2, x2) return C1 + C2 def first_prop(pd, px, nx, pw, nw): Rpp = F.linear(px, pw) * pd Rpn = F.linear(px, nw) * pd Rnp = F.linear(nx, pw) * pd Rnn = F.linear(nx, nw) * pd Pos = (Rpp + Rnn).sum(dim=-1, keepdim=True) Neg = (Rpn + Rnp).sum(dim=-1, keepdim=True) Z1 = F.linear(px, pw) Z2 = F.linear(px, nw) Z3 = F.linear(nx, pw) Z4 = F.linear(nx, nw) S1 = safe_divide(Rpp, Z1) S2 = safe_divide(Rpn, Z2) S3 = safe_divide(Rnp, Z3) S4 = safe_divide(Rnn, Z4) C1 = px * self.gradprop(Z1, px, S1)[0] C2 = px * self.gradprop(Z2, px, S2)[0] C3 = nx * self.gradprop(Z3, nx, S3)[0] C4 = nx * self.gradprop(Z4, nx, S4)[0] bp = self.bias * pd * safe_divide(Pos, Pos + Neg) bn = self.bias * pd * safe_divide(Neg, Pos + Neg) Sb1 = safe_divide(bp, Z1) Sb2 = safe_divide(bn, Z2) Cb1 = px * self.gradprop(Z1, px, Sb1)[0] Cb2 = px * self.gradprop(Z2, px, Sb2)[0] return C1 + C4 + Cb1 + C2 + C3 + Cb2 def backward(R_p, px, nx, pw, nw): # dealing bias # if paddle.is_tensor(self.bias): # bias_p = self.bias * R_p.not_equal(ZERO_TENSOR).astype(self.bias.dtype) # R_p = R_p - bias_p Rp = f(R_p, pw, nw, px, nx) # if paddle.is_tensor(self.bias): # Bp = f(bias_p, pw, nw, px, nx) # # Rp = Rp + Bp return Rp def redistribute(Rp_tmp): Rp = paddle.clip(Rp_tmp, min=0) Rn = paddle.clip(Rp_tmp, max=0) R_tot = (Rp - Rn).sum(dim=-1, keepdim=True) Rp_tmp3 = safe_divide(Rp, R_tot) * \ (Rp + Rn).sum(dim=-1, keepdim=True) Rn_tmp3 = -safe_divide(Rn, R_tot) * \ (Rp + Rn).sum(dim=-1, keepdim=True) return Rp_tmp3 + Rn_tmp3 pw = paddle.clip(self.weight, min=0) nw = paddle.clip(self.weight, max=0) X = self.X px = paddle.clip(X, min=0) nx = paddle.clip(X, max=0) if paddle.is_tensor( R_p) == True and R_p.max() == 1: # first propagation pd = R_p Rp_tmp = first_prop(pd, px, nx, pw, nw) A = redistribute(Rp_tmp) return A else: Rp = backward(R_p, px, nx, pw, nw) return Rp
def build(self, gallery_vectors, gallery_docs=[], pq_size=100, index_path='graph_index/', append_index=False): """ build index """ if paddle.is_tensor(gallery_vectors): gallery_vectors = gallery_vectors.numpy() assert gallery_vectors.ndim == 2, "Input vector must be 2D ..." self.total_num = gallery_vectors.shape[0] self.dim = gallery_vectors.shape[1] assert (len(gallery_docs) == self.total_num if len(gallery_docs) > 0 else True) print("training index -> num: {}, dim: {}, dist_type: {}".format( self.total_num, self.dim, self.dist_type)) if not os.path.exists(index_path): os.makedirs(index_path) if self.dist_type == "IP": build_mobius_index( gallery_vectors, self.total_num, self.dim, pq_size, self.mobius_pow, create_string_buffer((index_path + "/index").encode('utf-8'))) load_mobius_index_prefix( self.total_num, self.dim, ctypes.byref(self.index_context), create_string_buffer((index_path + "/index").encode('utf-8'))) else: build_l2_index( gallery_vectors, self.total_num, self.dim, pq_size, create_string_buffer((index_path + "/index").encode('utf-8'))) load_l2_index_prefix( self.total_num, self.dim, ctypes.byref(self.index_context), create_string_buffer((index_path + "/index").encode('utf-8'))) self.gallery_doc_dict = {} if len(gallery_docs) > 0: self.with_attr = True for i in range(gallery_vectors.shape[0]): self.gallery_doc_dict[str(i)] = gallery_docs[i] self.gallery_doc_dict["total_num"] = self.total_num self.gallery_doc_dict["dim"] = self.dim self.gallery_doc_dict["dist_type"] = self.dist_type self.gallery_doc_dict["with_attr"] = self.with_attr output_path = os.path.join(index_path, "info.json") if append_index is True and os.path.exists(output_path): with open(output_path, "r") as fin: lines = fin.readlines()[0] ori_gallery_doc_dict = json.loads(lines) assert ori_gallery_doc_dict["dist_type"] == self.gallery_doc_dict[ "dist_type"] assert ori_gallery_doc_dict["dim"] == self.gallery_doc_dict["dim"] assert ori_gallery_doc_dict["with_attr"] == self.gallery_doc_dict[ "with_attr"] offset = ori_gallery_doc_dict["total_num"] for i in range(0, self.gallery_doc_dict["total_num"]): ori_gallery_doc_dict[str(i + offset)] = self.gallery_doc_dict[ str(i)] ori_gallery_doc_dict["total_num"] += self.gallery_doc_dict[ "total_num"] self.gallery_doc_dict = ori_gallery_doc_dict with open(output_path, "w") as f: json.dump(self.gallery_doc_dict, f) print("finished creating index ...")