def _matrix_nms(bboxes, cate_labels, cate_scores, kernel='gaussian', sigma=2.0): """Matrix NMS for multi-class bboxes. Args: bboxes (Tensor): shape (n, 4) cate_labels (Tensor): shape (n), mask labels in descending order cate_scores (Tensor): shape (n), mask scores in descending order kernel (str): 'linear' or 'gaussian' sigma (float): std in gaussian method Returns: Tensor: cate_scores_update, tensors of shape (n) """ n_samples = len(cate_labels) if n_samples == 0: return [] # 计算一个n×n的IOU矩阵,两组矩形两两之间的IOU iou_matrix = jaccard(bboxes, bboxes) # shape: [n_samples, n_samples] iou_matrix = paddle.triu(iou_matrix, diagonal=1) # 只取上三角部分 # label_specific matrix. cate_labels_x = L.expand(L.reshape(cate_labels, (1, -1)), [n_samples, 1]) # shape: [n_samples, n_samples] # 第i行第j列表示的是第i个预测框和第j个预测框的类别id是否相同。我们抑制的是同类的预测框。 d = cate_labels_x - L.transpose(cate_labels_x, [1, 0]) d = L.pow(d, 2) # 同类处为0,非同类处>0。 tf中用 == 0比较无效,所以用 < 1 label_matrix = paddle.triu(L.cast(d < 1, 'float32'), diagonal=1) # shape: [n_samples, n_samples] # IoU compensation # 非同类的iou置为0,同类的iou保留。逐列取最大iou compensate_iou = L.reduce_max(iou_matrix * label_matrix, [0, ]) # shape: [n_samples, ] # compensate_iou第0行里的值a0(重复了n_samples次)表示第0个物体与 比它分高 的 同类物体的最高iou为a0, # compensate_iou第1行里的值a1(重复了n_samples次)表示第1个物体与 比它分高 的 同类物体的最高iou为a1,... # compensate_iou里每一列里的值依次代表第0个物体、第1个物体、...、第n_samples-1个物体与 比它自己分高 的 同类物体的最高iou。 compensate_iou = L.transpose(L.expand(L.reshape(compensate_iou, (1, -1)), [n_samples, 1]), [1, 0]) # shape: [n_samples, n_samples] # IoU decay # 非同类的iou置为0,同类的iou保留。 # decay_iou第i行第j列表示的是第i个预测框和第j个预测框的iou,如果不是同类,该iou置0。且只取上三角部分。 decay_iou = iou_matrix * label_matrix # shape: [n_samples, n_samples] # matrix nms if kernel == 'gaussian': decay_matrix = L.exp(-1 * sigma * (decay_iou ** 2)) compensate_matrix = L.exp(-1 * sigma * (compensate_iou ** 2)) decay_coefficient = L.reduce_sum(decay_matrix / compensate_matrix, [0, ]) elif kernel == 'linear': # 看第j列。(1_test_matrixnms.py里的例子,看第2列) # decay_iou 里第2列里的值为[0.9389, 0.9979, 0, 0]。第2个物体与比它分高的2个同类物体的iou是0.9389, 0.9979。 # compensate_iou里第2列里的值为[0, 0.9409, 0.9979, 0]。比第2个物体分高的2个同类物体 与 比它们自己分高 的 同类物体的最高iou 是0, 0.9409。 # decay_matrix 里第2列里的值为[0.0610, 0.0348, 485.28, 1]。取该列的最小值为0.0348(抑制掉第2个物体的是第1个物体)。其实后面2个值不用看,因为它们总是>=1。 # 总结:decay_matrix里第j列里的第i个值若为最小值,则抑制掉第j个物体的是第i个物体。 # 而且,表现为decay_iou尽可能大,decay_matrix才会尽可能小。 decay_matrix = (1-decay_iou)/(1-compensate_iou) decay_coefficient = L.reduce_min(decay_matrix, [0, ]) else: raise NotImplementedError # 更新分数 cate_scores_update = cate_scores * decay_coefficient return cate_scores_update
def create_mask(self, qlen, mlen): """ Creates causal attention mask. Float mask where 1.0 indicates masked, 0.0 indicates not-masked. Args: qlen: Sequence length mlen: Mask length :: same_length=False: same_length=True: <mlen > < qlen > <mlen > < qlen > ^ [0 0 0 0 0 1 1 1 1] [0 0 0 0 0 1 1 1 1] [0 0 0 0 0 0 1 1 1] [1 0 0 0 0 0 1 1 1] qlen [0 0 0 0 0 0 0 1 1] [1 1 0 0 0 0 0 1 1] [0 0 0 0 0 0 0 0 1] [1 1 1 0 0 0 0 0 1] v [0 0 0 0 0 0 0 0 0] [1 1 1 1 0 0 0 0 0] """ attn_mask = paddle.ones([qlen, qlen]) mask_up = paddle.triu(attn_mask, diagonal=1) attn_mask_pad = paddle.zeros([qlen, mlen]) ret = paddle.concat([attn_mask_pad, mask_up], axis=1) if self.same_length: mask_lo = paddle.tril(attn_mask, diagonal=-1) ret = paddle.concat([ret[:, :qlen] + mask_lo, ret[:, qlen:]], axis=1) return ret
def get_subsquent_mask(seq): "for masking out the subsequent info." sz_b, len_s = seq.shape[0], seq.shape[1] "!!torch.ones((1, sz_b, len_s))!!" subsequent_mask = (1 - paddle.triu(paddle.ones((1, len_s, len_s)), diagonal=1)) return subsequent_mask
def generate_square_subsequent_mask(self, sz): """Generate a square mask for the sequence. The masked positions are filled with float('-inf'). Unmasked positions are filled with float(0.0). """ mask = paddle.zeros([sz, sz], dtype='float32') mask_inf = paddle.triu(paddle.full(shape=[sz, sz], dtype='float32', fill_value='-inf'), diagonal=1) mask = mask + mask_inf return mask
def build_attention_mask(self): # lazily create causal attention mask, with full attention between the vision tokens # pytorch uses additive attention mask; fill with -inf # mask = paddle.empty((self.context_length, self.context_length),dtype='float32') # mask.fill_(float("-inf")) #mask.triu_(1) # zero out the lower diagonal mask = paddle.ones( (self.context_length, self.context_length)) * float("-inf") mask = paddle.triu(mask, diagonal=1) return mask
def __call__(self, seg_preds, seg_masks, cate_labels, cate_scores, sum_masks=None): # sort and keep top nms_pre sort_inds = self._sort_score(cate_scores, self.pre_nms_top_n) seg_masks = paddle.gather(seg_masks, index=sort_inds) seg_preds = paddle.gather(seg_preds, index=sort_inds) sum_masks = paddle.gather(sum_masks, index=sort_inds) cate_scores = paddle.gather(cate_scores, index=sort_inds) cate_labels = paddle.gather(cate_labels, index=sort_inds) seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1) # inter. inter_matrix = paddle.mm(seg_masks, paddle.transpose(seg_masks, [1, 0])) n_samples = paddle.shape(cate_labels) # union. sum_masks_x = paddle.expand(sum_masks, shape=[n_samples, n_samples]) # iou. iou_matrix = (inter_matrix / (sum_masks_x + paddle.transpose(sum_masks_x, [1, 0]) - inter_matrix)) iou_matrix = paddle.triu(iou_matrix, diagonal=1) # label_specific matrix. cate_labels_x = paddle.expand(cate_labels, shape=[n_samples, n_samples]) label_matrix = paddle.cast( (cate_labels_x == paddle.transpose(cate_labels_x, [1, 0])), 'float32') label_matrix = paddle.triu(label_matrix, diagonal=1) # IoU compensation compensate_iou = paddle.max((iou_matrix * label_matrix), axis=0) compensate_iou = paddle.expand(compensate_iou, shape=[n_samples, n_samples]) compensate_iou = paddle.transpose(compensate_iou, [1, 0]) # IoU decay decay_iou = iou_matrix * label_matrix # matrix nms if self.kernel == 'gaussian': decay_matrix = paddle.exp(-1 * self.sigma * (decay_iou**2)) compensate_matrix = paddle.exp(-1 * self.sigma * (compensate_iou**2)) decay_coefficient = paddle.min(decay_matrix / compensate_matrix, axis=0) elif self.kernel == 'linear': decay_matrix = (1 - decay_iou) / (1 - compensate_iou) decay_coefficient = paddle.min(decay_matrix, axis=0) else: raise NotImplementedError # update the score. cate_scores = cate_scores * decay_coefficient y = paddle.zeros(shape=paddle.shape(cate_scores), dtype='float32') keep = paddle.where(cate_scores >= self.update_threshold, cate_scores, y) keep = paddle.nonzero(keep) keep = paddle.squeeze(keep, axis=[1]) # Prevent empty and increase fake data keep = paddle.concat( [keep, paddle.cast(paddle.shape(cate_scores)[0] - 1, 'int64')]) seg_preds = paddle.gather(seg_preds, index=keep) cate_scores = paddle.gather(cate_scores, index=keep) cate_labels = paddle.gather(cate_labels, index=keep) # sort and keep top_k sort_inds = self._sort_score(cate_scores, self.post_nms_top_n) seg_preds = paddle.gather(seg_preds, index=sort_inds) cate_scores = paddle.gather(cate_scores, index=sort_inds) cate_labels = paddle.gather(cate_labels, index=sort_inds) return seg_preds, cate_scores, cate_labels
def _forward(self, dec_inputs, mems=None): bsz, qlen = dec_inputs.shape word_emb = self.word_emb(dec_inputs) mlen = mems[0].shape[1] if mems is not None else 0 klen = mlen + qlen if self.same_length: all_ones = paddle.ones(shape=[qlen, klen], dtype=word_emb.dtype) mask_len = klen - self.mem_len if mask_len > 0: mask_shift_len = qlen - mask_len else: mask_shift_len = qlen dec_attn_mask = (paddle.triu( all_ones, diagonal=1 + mlen) + paddle.tril( all_ones, -mask_shift_len)).unsqueeze([0, 1]) else: dec_attn_mask = paddle.ones( shape=[qlen, klen], dtype=word_emb.dtype) dec_attn_mask = paddle.triu( dec_attn_mask, diagonal=1 + mlen).unsqueeze([0, 1]) hids = [] if self.attn_type == 0: pos_seq = paddle.arange(klen - 1, -1, -1.0, dtype=word_emb.dtype) if self.clamp_len > 0: # TODO: clamp and clip pos_seq = paddle.clip(pos_seq, max=self.clamp_len) pos_emb = self.pos_emb(pos_seq, bsz) core_out = self.drop(word_emb) pos_emb = self.drop(pos_emb) hids.append(core_out) for i, layer in enumerate(self.layers): mems_i = None if mems is None else mems[i] core_out = layer( core_out, pos_emb, self.r_w_bias, self.r_r_bias, dec_attn_mask=dec_attn_mask, mems=mems_i) hids.append(core_out) elif self.attn_type == 1: core_out = self.drop(word_emb) hids.append(core_out) for i, layer in enumerate(self.layers): if self.clamp_len > 0: r_emb = self.r_emb[i][-self.clamp_len:] r_bias = self.r_bias[i][-self.clamp_len:] else: r_emb, r_bias = self.r_emb[i], self.r_bias[i] mems_i = None if mems is None else mems[i] core_out = layer( core_out, r_emb, self.r_w_bias[i], r_bias, dec_attn_mask=dec_attn_mask, mems=mems_i) hids.append(core_out) elif self.attn_type == 2: pos_seq = paddle.arange(klen - 1, -1, -1.0, dtype=word_emb.dtype) if self.clamp_len > 0: pos_seq = paddle.clip(pos_seq, max=self.clamp_len) pos_emb = self.pos_emb(pos_seq, bsz) core_out = self.drop(word_emb + pos_emb[-qlen:]) hids.append(core_out) for i, layer in enumerate(self.layers): mems_i = None if mems is None else mems[i] if mems_i is not None and i == 0: mems_i += pos_emb[:mlen] core_out = layer( core_out, dec_attn_mask=dec_attn_mask, mems=mems_i) hids.append(core_out) elif self.attn_type == 3: core_out = self.drop(word_emb) hids.append(core_out) for i, layer in enumerate(self.layers): mems_i = None if mems is None else mems[i] if mems_i is not None and mlen > 0: cur_emb = self.r_emb[i][:-qlen] cur_size = cur_emb.size(0) if cur_size < mlen: cur_emb_pad = cur_emb[0:1].expand(mlen - cur_size, -1, -1) cur_emb = paddle.concat([cur_emb_pad, cur_emb], 0) else: cur_emb = cur_emb[-mlen:] mems_i += cur_emb.view(mlen, 1, -1) core_out += self.r_emb[i][-qlen:].view(qlen, 1, -1) core_out = layer( core_out, dec_attn_mask=dec_attn_mask, mems=mems_i) hids.append(core_out) core_out = self.drop(core_out) new_mems = self._update_mems(hids, mems, mlen, qlen) return core_out, new_mems
def build_attention_mask(self): mask = paddle.full((self.context_length, self.context_length), float("-inf")) mask = paddle.triu(mask, diagonal=1) return mask