def test_nonzero_api_as_tuple(self): data = np.array([[True, False], [False, True]]) with program_guard(Program(), Program()): x = fluid.layers.data(name='x', shape=[-1, 2]) y = paddle.nonzero(x, as_tuple=True) self.assertEqual(type(y), tuple) self.assertEqual(len(y), 2) z = fluid.layers.concat(list(y), axis=1) exe = fluid.Executor(fluid.CPUPlace()) res, = exe.run(feed={'x': data}, fetch_list=[z.name], return_numpy=False) expect_out = np.array([[0, 0], [1, 1]]) self.assertTrue(np.allclose(expect_out, np.array(res))) data = np.array([True, True, False]) with program_guard(Program(), Program()): x = fluid.layers.data(name='x', shape=[-1]) y = paddle.nonzero(x, as_tuple=True) self.assertEqual(type(y), tuple) self.assertEqual(len(y), 1) z = fluid.layers.concat(list(y), axis=1) exe = fluid.Executor(fluid.CPUPlace()) res, = exe.run(feed={'x': data}, fetch_list=[z.name], return_numpy=False) expect_out = np.array([[0], [1]]) self.assertTrue(np.allclose(expect_out, np.array(res)))
def get_loss(self, pred_scores, pred_deltas, anchors, inputs): """ pred_scores (list[Tensor]): Multi-level scores prediction pred_deltas (list[Tensor]): Multi-level deltas prediction anchors (list[Tensor]): Multi-level anchors inputs (dict): ground truth info, including im, gt_bbox, gt_score """ anchors = [paddle.reshape(a, shape=(-1, 4)) for a in anchors] anchors = paddle.concat(anchors) scores = [ paddle.reshape(paddle.transpose(v, perm=[0, 2, 3, 1]), shape=(v.shape[0], -1, 1)) for v in pred_scores ] scores = paddle.concat(scores, axis=1) deltas = [ paddle.reshape(paddle.transpose(v, perm=[0, 2, 3, 1]), shape=(v.shape[0], -1, 4)) for v in pred_deltas ] deltas = paddle.concat(deltas, axis=1) score_tgt, bbox_tgt, loc_tgt, norm = self.rpn_target_assign( inputs, anchors) scores = paddle.reshape(x=scores, shape=(-1, )) deltas = paddle.reshape(x=deltas, shape=(-1, 4)) score_tgt = paddle.concat(score_tgt) score_tgt.stop_gradient = True pos_mask = score_tgt == 1 pos_ind = paddle.nonzero(pos_mask) valid_mask = score_tgt >= 0 valid_ind = paddle.nonzero(valid_mask) # cls loss if valid_ind.shape[0] == 0: loss_rpn_cls = paddle.zeros([1], dtype='float32') else: score_pred = paddle.gather(scores, valid_ind) score_label = paddle.gather(score_tgt, valid_ind).cast('float32') score_label.stop_gradient = True loss_rpn_cls = F.binary_cross_entropy_with_logits( logit=score_pred, label=score_label, reduction="sum") # reg loss if pos_ind.shape[0] == 0: loss_rpn_reg = paddle.zeros([1], dtype='float32') else: loc_pred = paddle.gather(deltas, pos_ind) loc_tgt = paddle.concat(loc_tgt) loc_tgt = paddle.gather(loc_tgt, pos_ind) loc_tgt.stop_gradient = True loss_rpn_reg = paddle.abs(loc_pred - loc_tgt).sum() return { 'loss_rpn_cls': loss_rpn_cls / norm, 'loss_rpn_reg': loss_rpn_reg / norm }
def forward(self, pred, batch): node_preds, edge_preds = pred gts, tag = batch[4], batch[5] gts = self.pre_process(gts, tag) node_gts, edge_gts = [], [] for gt in gts: node_gts.append(gt[:, 0]) edge_gts.append(gt[:, 1:].reshape([-1])) node_gts = paddle.concat(node_gts) edge_gts = paddle.concat(edge_gts) node_valids = paddle.nonzero(node_gts != self.ignore).reshape([-1]) edge_valids = paddle.nonzero(edge_gts != -1).reshape([-1]) loss_node = self.loss_node(node_preds, node_gts) loss_edge = self.loss_edge(edge_preds, edge_gts) loss = self.node_weight * loss_node + self.edge_weight * loss_edge return dict( loss=loss, loss_node=loss_node, loss_edge=loss_edge, acc_node=self.accuracy( paddle.gather(node_preds, node_valids), paddle.gather(node_gts, node_valids)), acc_edge=self.accuracy( paddle.gather(edge_preds, edge_valids), paddle.gather(edge_gts, edge_valids)))
def subsample_labels(labels, num_samples, fg_fraction, bg_label=0, use_random=True): positive = paddle.nonzero( paddle.logical_and(labels != -1, labels != bg_label)) negative = paddle.nonzero(labels == bg_label) positive = positive.cast('int32').flatten() negative = negative.cast('int32').flatten() fg_num = int(num_samples * fg_fraction) fg_num = min(positive.numel(), fg_num) bg_num = num_samples - fg_num bg_num = min(negative.numel(), bg_num) # randomly select positive and negative examples fg_perm = paddle.randperm(positive.numel(), dtype='int32') fg_perm = paddle.slice(fg_perm, axes=[0], starts=[0], ends=[fg_num]) bg_perm = paddle.randperm(negative.numel(), dtype='int32') bg_perm = paddle.slice(bg_perm, axes=[0], starts=[0], ends=[bg_num]) if use_random: fg_inds = paddle.gather(positive, fg_perm) bg_inds = paddle.gather(negative, bg_perm) else: fg_inds = paddle.slice(positive, axes=[0], starts=[0], ends=[fg_num]) bg_inds = paddle.slice(negative, axes=[0], starts=[0], ends=[bg_num]) return fg_inds, bg_inds
def emb_loss(self, p_ide, t_conf, t_ide, emb_scale, classifier): emb_dim = p_ide.shape[1] p_ide = p_ide.transpose((0, 2, 3, 1)) p_ide_flatten = paddle.reshape(p_ide, [-1, emb_dim]) mask = t_conf > 0 mask = paddle.cast(mask, dtype="int64") mask.stop_gradient = True emb_mask = mask.max(1).flatten() emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten() emb_mask_inds.stop_gradient = True # use max(1) to decide the id, TODO: more reseanable strategy t_ide_flatten = t_ide.max(1).flatten() t_ide_flatten = paddle.cast(t_ide_flatten, dtype="int64") valid_inds = paddle.nonzero(t_ide_flatten != -1).flatten() if emb_mask_inds.numel() == 0 or valid_inds.numel() == 0: # loss_ide = paddle.to_tensor([0]) # will be error in gradient backward loss_ide = self.phony * 0 # todo else: embedding = paddle.gather(p_ide_flatten, emb_mask_inds) embedding = emb_scale * F.normalize(embedding) logits = classifier(embedding) ide_target = paddle.gather(t_ide_flatten, emb_mask_inds) loss_ide = F.cross_entropy(logits, ide_target, ignore_index=-1, reduction='mean') loss_ide.stop_gradient = False return loss_ide
def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): """logits.shape == [B*W, V]""" _, vocab_size = logits.shape bsz, beam_width = state.log_probs.shape onehot_eos = P.cast(F.one_hot(P.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] probs = P.log(F.softmax(logits)) #[B*W, V] probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] allprobs = P.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] not_finished = 1 - P.reshape(state.finished, [-1, 1]) #[B*W,1] not_eos = 1 - onehot_eos length_to_add = not_finished * not_eos #[B*W,V] alllen = P.reshape(state.lengths, [-1, 1]) + length_to_add allprobs = P.reshape(allprobs, [-1, beam_width * vocab_size]) alllen = P.reshape(alllen, [-1, beam_width * vocab_size]) allscore = hyp_score(allprobs, alllen, length_penalty) if is_first_step: allscore = P.reshape( allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 scores, idx = P.topk(allscore, k=beam_width) #[B, W] next_beam_id = idx // vocab_size #[B, W] next_word_id = idx % vocab_size gather_idx = P.concat( [P.nonzero(idx != -1)[:, :1], P.reshape(idx, [-1, 1])], 1) next_probs = P.reshape(P.gather_nd(allprobs, gather_idx), idx.shape) next_len = P.reshape(P.gather_nd(alllen, gather_idx), idx.shape) gather_idx = P.concat([ P.nonzero(next_beam_id != -1)[:, :1], P.reshape(next_beam_id, [-1, 1]) ], 1) next_finished = P.reshape( P.gather_nd(state.finished, gather_idx), state.finished.shape ) #[gather new beam state according to new beam id] #log.debug(gather_idx.numpy()) #log.debug(state.finished.numpy()) #log.debug(next_finished.numpy()) next_finished += P.cast(next_word_id == eos_id, 'int64') next_finished = P.cast(next_finished > 0, 'int64') #log.debug(next_word_id.numpy()) #log.debug(next_beam_id.numpy()) next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) return output, next_state
def forward(self, features, im_info, boxes=None): # prediction pred_cls_score_list = [] pred_bbox_offsets_list = [] for x in features: t = F.relu(self.rpn_conv(x)) pred_cls_score_list.append(self.rpn_cls_score(t)) pred_bbox_offsets_list.append(self.rpn_bbox_offsets(t)) # get anchors all_anchors_list = [] # stride: 64,32,16,8,4 p6->p2 base_stride = 4 off_stride = 2**(len(features) - 1) # 16 for fm in features: layer_anchors = self.anchors_generator(fm, base_stride, off_stride) off_stride = off_stride // 2 all_anchors_list.append(layer_anchors) # sample from the predictions rpn_rois = find_top_rpn_proposals(self.training, pred_bbox_offsets_list, pred_cls_score_list, all_anchors_list, im_info) rpn_rois = rpn_rois.cast('float32') if self.training: rpn_labels, rpn_bbox_targets = fpn_anchor_target( boxes, im_info, all_anchors_list) #rpn_labels = rpn_labels.astype(np.int32) pred_cls_score, pred_bbox_offsets = fpn_rpn_reshape( pred_cls_score_list, pred_bbox_offsets_list) # rpn loss valid_masks = rpn_labels >= 0 # objectness_loss = softmax_loss( # torch.gather(pred_cls_score,torch.nonzero(valid_masks)), # torch.gather(rpn_labels,torch.nonzero(valid_masks))) objectness_loss = F.binary_cross_entropy( F.softmax( torch.gather(pred_cls_score, torch.nonzero(valid_masks))), torch.gather( torch.eye(2), torch.gather(rpn_labels, torch.nonzero(valid_masks)))) pos_masks = rpn_labels > 0 # localization_loss = smooth_l1_loss( # pred_bbox_offsets[pos_masks], # rpn_bbox_targets[pos_masks], # config.rpn_smooth_l1_beta) localization_loss = \ F.smooth_l1_loss(torch.gather(pred_bbox_offsets, torch.nonzero(pos_masks)), torch.gather(rpn_bbox_targets, torch.nonzero(pos_masks)),delta=config.rcnn_smooth_l1_beta) normalizer = 1 / valid_masks.cast('float32').sum() loss_rpn_cls = objectness_loss.sum() * normalizer loss_rpn_loc = localization_loss.sum() * normalizer loss_dict = {} loss_dict['loss_rpn_cls'] = loss_rpn_cls loss_dict['loss_rpn_loc'] = loss_rpn_loc return rpn_rois, loss_dict else: return rpn_rois
def libra_sample_bbox(matches, match_labels, matched_vals, gt_classes, batch_size_per_im, num_classes, fg_fraction, fg_thresh, bg_thresh, num_bins, use_random=True, is_cascade_rcnn=False): rois_per_image = int(batch_size_per_im) fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) bg_rois_per_im = rois_per_image - fg_rois_per_im if is_cascade_rcnn: fg_inds = paddle.nonzero(matched_vals >= fg_thresh) bg_inds = paddle.nonzero(matched_vals < bg_thresh) else: matched_vals_np = matched_vals.numpy() match_labels_np = match_labels.numpy() # sample fg fg_inds = paddle.nonzero(matched_vals >= fg_thresh).flatten() fg_nums = int(np.minimum(fg_rois_per_im, fg_inds.shape[0])) if (fg_inds.shape[0] > fg_nums) and use_random: fg_inds = libra_sample_pos(matched_vals_np, match_labels_np, fg_inds.numpy(), fg_rois_per_im) fg_inds = fg_inds[:fg_nums] # sample bg bg_inds = paddle.nonzero(matched_vals < bg_thresh).flatten() bg_nums = int(np.minimum(rois_per_image - fg_nums, bg_inds.shape[0])) if (bg_inds.shape[0] > bg_nums) and use_random: bg_inds = libra_sample_neg(matched_vals_np, match_labels_np, bg_inds.numpy(), bg_rois_per_im, num_bins=num_bins, bg_thresh=bg_thresh) bg_inds = bg_inds[:bg_nums] sampled_inds = paddle.concat([fg_inds, bg_inds]) gt_classes = paddle.gather(gt_classes, matches) gt_classes = paddle.where(match_labels == 0, paddle.ones_like(gt_classes) * num_classes, gt_classes) gt_classes = paddle.where(match_labels == -1, paddle.ones_like(gt_classes) * -1, gt_classes) sampled_gt_classes = paddle.gather(gt_classes, sampled_inds) return sampled_inds, sampled_gt_classes
def fpn_anchor_target_opr_core_impl(gt_boxes, im_info, anchors, allow_low_quality_matches=True): ignore_label = config.ignore_label # get the gt boxes valid_gt_boxes = gt_boxes[:int(im_info[5]), :] valid_gt_boxes = torch.gather( valid_gt_boxes, torch.nonzero( gt(valid_gt_boxes[:, -1], torch.zeros(valid_gt_boxes[:, -1].shape)))) # compute the iou matrix anchors = anchors.cast('float32') overlaps = box_overlap_opr(anchors, valid_gt_boxes[:, :4]) # match the dtboxes max_overlaps = torch.max(overlaps, axis=1) argmax_overlaps = torch.argmax(overlaps, axis=1) # _, gt_argmax_overlaps = torch.max(overlaps, axis=0) gt_argmax_overlaps = my_gt_argmax(overlaps) del overlaps # all ignore labels = torch.ones(torch.to_tensor( anchors.shape[0])).cast('long') * ignore_label # set negative ones labels = labels * (max_overlaps >= config.rpn_negative_overlap) # set positive ones fg_mask = (max_overlaps >= config.rpn_positive_overlap) if allow_low_quality_matches: gt_id = torch.arange(valid_gt_boxes.shape[0]).cast('float32') #argmax_overlaps[gt_argmax_overlaps] = gt_id for i, j in zip(gt_argmax_overlaps, range(gt_argmax_overlaps.shape[0])): argmax_overlaps[i] = gt_id[j] max_overlaps[i] = 1 #max_overlaps[gt_argmax_overlaps] = 1 fg_mask = (max_overlaps >= config.rpn_positive_overlap) # set positive ones fg_mask_ind = torch.nonzero(fg_mask, as_tuple=False).flatten() #labels[fg_mask_ind] = 1 for i in fg_mask_ind: labels[i] = 1 # bbox targets bbox_targets = bbox_transform_opr( anchors, torch.gather(valid_gt_boxes, argmax_overlaps)[:, :4]) if config.rpn_bbox_normalize_targets: std_opr = torch.to_tensor( config.bbox_normalize_stds[None, :]).type_as(bbox_targets) mean_opr = torch.to_tensor( config.bbox_normalize_means[None, :]).type_as(bbox_targets) minus_opr = mean_opr / std_opr bbox_targets = bbox_targets / std_opr - minus_opr return labels, bbox_targets
def get_emb_and_gt_outs(self, ide_outs, targets): emb_and_gts = [] for i, p_ide in enumerate(ide_outs): t_conf = targets['tconf{}'.format(i)] t_ide = targets['tide{}'.format(i)] p_ide = p_ide.transpose((0, 2, 3, 1)) p_ide_flatten = paddle.reshape(p_ide, [-1, self.embedding_dim]) mask = t_conf > 0 mask = paddle.cast(mask, dtype="int64") emb_mask = mask.max(1).flatten() emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten() if len(emb_mask_inds) > 0: t_ide_flatten = paddle.reshape(t_ide.max(1), [-1, 1]) tids = paddle.gather(t_ide_flatten, emb_mask_inds) embedding = paddle.gather(p_ide_flatten, emb_mask_inds) embedding = self.emb_scale * F.normalize(embedding) emb_and_gt = paddle.concat([embedding, tids], axis=1) emb_and_gts.append(emb_and_gt) if len(emb_and_gts) > 0: return paddle.concat(emb_and_gts, axis=0) else: return paddle.zeros((1, self.embedding_dim + 1))
def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds, num_classes, resolution): mask_rois = [] mask_rois_num = [] tgt_masks = [] tgt_classes = [] mask_index = [] tgt_weights = [] for k in range(len(rois)): has_fg = True rois_per_im = rois[k] gt_segms_per_im = gt_segms[k] labels_per_im = labels_int32[k] # select rois labeled with foreground fg_inds = paddle.nonzero( paddle.logical_and(labels_per_im != -1, labels_per_im != num_classes)) # generate fake roi if foreground is empty if fg_inds.numel() == 0: has_fg = False fg_inds = paddle.ones([1], dtype='int32') inds_per_im = sampled_gt_inds[k] inds_per_im = paddle.gather(inds_per_im, fg_inds) gt_segms_per_im = paddle.gather(gt_segms_per_im, inds_per_im) fg_rois = paddle.gather(rois_per_im, fg_inds) fg_classes = paddle.gather(labels_per_im, fg_inds) fg_segms = paddle.gather(gt_segms_per_im, fg_inds) weight = paddle.ones([fg_rois.shape[0]], dtype='float32') if not has_fg: weight = weight - 1 # remove padding gt_polys = fg_segms.numpy() boxes = fg_rois.numpy() new_gt_polys = _strip_pad(gt_polys) results = [ rasterize_polygons_within_box(poly, box, resolution) for poly, box in zip(new_gt_polys, boxes) ] tgt_mask = paddle.stack(results) tgt_mask.stop_gradient = True fg_rois.stop_gradient = True mask_index.append(fg_inds) mask_rois.append(fg_rois) mask_rois_num.append(paddle.shape(fg_rois)[0]) tgt_classes.append(fg_classes) tgt_masks.append(tgt_mask) tgt_weights.append(weight) mask_index = paddle.concat(mask_index) mask_rois_num = paddle.concat(mask_rois_num) tgt_classes = paddle.concat(tgt_classes, axis=0) tgt_masks = paddle.concat(tgt_masks, axis=0) tgt_weights = paddle.concat(tgt_weights, axis=0) return mask_rois, mask_rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights
def reorder_(t, parent_id): """reorder cache according to parent beam id""" gather_idx = paddle.nonzero( parent_id != -1)[:, 0] * beam_width + paddle.reshape( parent_id, [-1]) t = paddle.gather(t, gather_idx) return t
def forward(self, inp): if self.div_val == 1: embed = self.emb_layers[0](inp) if self.d_proj != self.d_embed: embed = F.linear(embed, self.emb_projs[0]) else: inp_flat = paddle.reshape(inp, shape=[-1]) emb_flat = paddle.zeros( [inp_flat.shape[0], self.d_proj], dtype=global_dtype) for i in range(len(self.cutoffs)): l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i + 1] mask_i = (inp_flat >= l_idx) & (inp_flat < r_idx) indices_i = paddle.nonzero(mask_i).squeeze([1]) if indices_i.numel() == 0: continue inp_i = paddle.gather(inp_flat, indices_i, axis=0) - l_idx emb_i = self.emb_layers[i](inp_i) emb_i = F.linear(emb_i, self.emb_projs[i]) emb_flat = paddle.scatter(emb_flat, indices_i, emb_i) embed = paddle.reshape( emb_flat, shape=inp.shape.append(self.d_proj)) embed = embed * self.emb_scale return embed
def forward(self, loss_confs, loss_boxes, loss_ides, loss_params_cls, loss_params_reg, loss_params_ide, targets): assert len(loss_confs) == len(loss_boxes) == len(loss_ides) assert len(loss_params_cls) == len(loss_params_reg) == len( loss_params_ide) assert len(loss_confs) == len(loss_params_cls) batchsize = targets['gt_bbox'].shape[0] nTargets = paddle.nonzero(paddle.sum(targets['gt_bbox'], axis=2)).shape[0] / batchsize nTargets = paddle.to_tensor(nTargets, dtype='float32') nTargets.stop_gradient = True jde_losses = [] for i, (loss_conf, loss_box, loss_ide, l_conf_p, l_box_p, l_ide_p) in enumerate( zip(loss_confs, loss_boxes, loss_ides, loss_params_cls, loss_params_reg, loss_params_ide)): jde_loss = l_conf_p(loss_conf) + l_box_p(loss_box) + l_ide_p( loss_ide) jde_losses.append(jde_loss) loss_all = { "loss_conf": sum(loss_confs), "loss_box": sum(loss_boxes), "loss_ide": sum(loss_ides), "loss": sum(jde_losses), "nTargets": nTargets, } return loss_all
def test_dygraph_api(self): data_x = np.array([[True, False], [False, True]]) with fluid.dygraph.guard(): x = fluid.dygraph.to_variable(data_x) z = paddle.nonzero(x) np_z = z.numpy() expect_out = np.array([[0, 0], [1, 1]])
def forward(self): fpn_rois = self.input('FpnRois', 0) areas = self.bbox_area(fpn_rois) scale = paddle.sqrt(areas) num_level = self.max_level - self.min_level + 1 target_level = paddle.log(scale / self.refer_scale + 1e-06) / np.log(2) target_level = paddle.floor(self.refer_level + target_level) target_level = paddle.clip(target_level, min=self.min_level, max=self.max_level) rois = list() rois_idx_order = list() for level in range(self.min_level, self.max_level + 1): level_tensor = paddle.full_like(target_level, fill_value=level) res = paddle.equal(target_level, level_tensor) res = paddle.squeeze(res, axis=1) res = paddle.cast(res, dtype='int32') index = paddle.nonzero(res) roi = paddle.gather(fpn_rois, index, axis=0) rois.append(roi) rois_idx_order.append(index) rois_idx_order = paddle.concat(rois_idx_order, axis=0) size = paddle.shape(rois_idx_order)[0] _, rois_idx_restore = paddle.topk(rois_idx_order, axis=0, sorted=True, largest=False, k=size) #rois_idx_restore = paddle.cast(rois_idx_restore, dtype='int32') return {'MultiFpnRois': rois, 'RestoreIndex': [rois_idx_restore]}
def forward(self, logit, label): n, c, h, w = logit.shape total_loss = 0.0 if len(label.shape) != len(logit.shape): label = paddle.unsqueeze(label, 1) for i in range(n): x = paddle.unsqueeze(logit[i], 0) y = paddle.unsqueeze(label[i], 0) x = paddle.transpose(x, (0, 2, 3, 1)) y = paddle.transpose(y, (0, 2, 3, 1)) x = paddle.reshape(x, shape=(-1, c)) y = paddle.reshape(y, shape=(-1, )) loss = F.cross_entropy(x, y, weight=self.weight, ignore_index=self.ignore_index, reduction="none") sorted_loss = paddle.sort(loss, descending=True) if sorted_loss[self.K] > self.threshold: new_indices = paddle.nonzero(sorted_loss > self.threshold) loss = paddle.gather(sorted_loss, new_indices) else: loss = sorted_loss[:self.K] total_loss += paddle.mean(loss) return total_loss / float(n)
def forward(self, body_feats=None, rois=None, rois_num=None, inputs=None): """ body_feats (list[Tensor]): Feature maps from backbone rois (Tensor): RoIs generated from RPN module rois_num (Tensor): The number of RoIs in each image inputs (dict{Tensor}): The ground-truth of image """ targets = [] if self.training: rois, rois_num, targets = self.bbox_assigner( rois, rois_num, inputs) targets_list = [targets] self.assigned_rois = (rois, rois_num) self.assigned_targets = targets pred_bbox = None head_out_list = [] for i in range(self.num_cascade_stages): if i > 0: rois, rois_num = self._get_rois_from_boxes( pred_bbox, inputs['im_shape']) if self.training: rois, rois_num, targets = self.bbox_assigner( rois, rois_num, inputs, i, is_cascade=True) tgt_labels = targets[0] tgt_labels = paddle.concat( tgt_labels) if len(tgt_labels) > 1 else tgt_labels[0] tgt_labels.stop_gradient = True fg_inds = paddle.nonzero( paddle.logical_and( tgt_labels >= 0, tgt_labels < self.num_classes)).flatten() if fg_inds.numel() == 0: targets_list.append(targets_list[-1]) else: targets_list.append(targets) rois_feat = self.roi_extractor(body_feats, rois, rois_num) bbox_feat = self.head(rois_feat, i) scores = self.bbox_score_list[i](bbox_feat) deltas = self.bbox_delta_list[i](bbox_feat) head_out_list.append([scores, deltas, rois]) pred_bbox = self._get_pred_bbox(deltas, rois, self.bbox_weight[i]) if self.training: loss = {} for stage, value in enumerate(zip(head_out_list, targets_list)): (scores, deltas, rois), targets = value loss_stage = self.get_loss(scores, deltas, targets, rois, self.bbox_weight[stage]) for k, v in loss_stage.items(): loss[ k + "_stage{}".format(stage)] = v / self.num_cascade_stages return loss, bbox_feat else: scores, deltas, self.refined_rois = self.get_prediction( head_out_list) return (deltas, scores), self.head
def filter_box_by_weight(self, pred, target, weight): index = paddle.nonzero(weight > 0) index.stop_gradient = True weight = paddle.gather_nd(weight, index) pred = paddle.gather_nd(pred, index) target = paddle.gather_nd(target, index) return pred, target, weight
def filter_roi(rois, max_overlap): ws = rois[:, 2] - rois[:, 0] hs = rois[:, 3] - rois[:, 1] valid_mask = paddle.logical_and(ws > 0, hs > 0, max_overlap < 1) keep = paddle.nonzero(valid_mask) if keep.numel() > 0: return rois[keep[:, 1]] return paddle.zeros((1, 4), dtype='float32')
def nonempty_bbox(boxes, min_size=0, return_mask=False): w = boxes[:, 2] - boxes[:, 0] h = boxes[:, 3] - boxes[:, 1] mask = paddle.logical_and(w > min_size, w > min_size) if return_mask: return mask keep = paddle.nonzero(mask).flatten() return keep
def get_loss(self, scores, deltas, targets, rois, bbox_weight): """ scores (Tensor): scores from bbox head outputs deltas (Tensor): deltas from bbox head outputs targets (list[List[Tensor]]): bbox targets containing tgt_labels, tgt_bboxes and tgt_gt_inds rois (List[Tensor]): RoIs generated in each batch """ # TODO: better pass args tgt_labels, tgt_bboxes, tgt_gt_inds = targets tgt_labels = paddle.concat( tgt_labels) if len(tgt_labels) > 1 else tgt_labels[0] tgt_labels = tgt_labels.cast('int64') tgt_labels.stop_gradient = True loss_bbox_cls = F.cross_entropy(input=scores, label=tgt_labels, reduction='mean') # bbox reg cls_agnostic_bbox_reg = deltas.shape[1] == 4 fg_inds = paddle.nonzero( paddle.logical_and(tgt_labels >= 0, tgt_labels < self.num_classes)).flatten() cls_name = 'loss_bbox_cls' reg_name = 'loss_bbox_reg' loss_bbox = {} if cls_agnostic_bbox_reg: reg_delta = paddle.gather(deltas, fg_inds) else: fg_gt_classes = paddle.gather(tgt_labels, fg_inds) reg_row_inds = paddle.arange(fg_gt_classes.shape[0]).unsqueeze(1) reg_row_inds = paddle.tile(reg_row_inds, [1, 4]).reshape([-1, 1]) reg_col_inds = 4 * fg_gt_classes.unsqueeze(1) + paddle.arange(4) reg_col_inds = reg_col_inds.reshape([-1, 1]) reg_inds = paddle.concat([reg_row_inds, reg_col_inds], axis=1) reg_delta = paddle.gather(deltas, fg_inds) reg_delta = paddle.gather_nd(reg_delta, reg_inds).reshape([-1, 4]) rois = paddle.concat(rois) if len(rois) > 1 else rois[0] tgt_bboxes = paddle.concat( tgt_bboxes) if len(tgt_bboxes) > 1 else tgt_bboxes[0] reg_target = bbox2delta(rois, tgt_bboxes, bbox_weight) reg_target = paddle.gather(reg_target, fg_inds) reg_target.stop_gradient = True loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum() / tgt_labels.shape[0] loss_bbox[cls_name] = loss_bbox_cls loss_bbox[reg_name] = loss_bbox_reg return loss_bbox
def update(self, arc_preds, rel_preds, arcs, rels, mask): select = paddle.nonzero(mask) arc_mask = paddle.gather_nd(arc_preds == arcs, select) rel_mask = paddle.logical_and( paddle.gather_nd(rel_preds == rels, select), arc_mask) self.total += len(arc_mask) self.correct_arcs += np.sum(arc_mask.numpy()).item() self.correct_rels += np.sum(rel_mask.numpy()).item()
def label_box(anchors, gt_boxes, positive_overlap, negative_overlap, allow_low_quality, ignore_thresh, is_crowd=None): iou = bbox_overlaps(gt_boxes, anchors) n_gt = gt_boxes.shape[0] if n_gt == 0 or is_crowd is None: n_gt_crowd = 0 else: n_gt_crowd = paddle.nonzero(is_crowd).shape[0] if iou.shape[0] == 0 or n_gt_crowd == n_gt: # No truth, assign everything to background default_matches = paddle.full((iou.shape[1], ), 0, dtype='int64') default_match_labels = paddle.full((iou.shape[1], ), 0, dtype='int32') return default_matches, default_match_labels # if ignore_thresh > 0, remove anchor if it is closed to # one of the crowded ground-truth if n_gt_crowd > 0: N_a = anchors.shape[0] ones = paddle.ones([N_a]) mask = is_crowd * ones if ignore_thresh > 0: crowd_iou = iou * mask valid = (paddle.sum((crowd_iou > ignore_thresh).cast('int32'), axis=0) > 0).cast('float32') iou = iou * (1 - valid) - valid # ignore the iou between anchor and crowded ground-truth iou = iou * (1 - mask) - mask matched_vals, matches = paddle.topk(iou, k=1, axis=0) match_labels = paddle.full(matches.shape, -1, dtype='int32') # set ignored anchor with iou = -1 neg_cond = paddle.logical_and(matched_vals > -1, matched_vals < negative_overlap) match_labels = paddle.where(neg_cond, paddle.zeros_like(match_labels), match_labels) match_labels = paddle.where(matched_vals >= positive_overlap, paddle.ones_like(match_labels), match_labels) if allow_low_quality: highest_quality_foreach_gt = iou.max(axis=1, keepdim=True) pred_inds_with_highest_quality = paddle.logical_and( iou > 0, iou == highest_quality_foreach_gt).cast('int32').sum(0, keepdim=True) match_labels = paddle.where(pred_inds_with_highest_quality > 0, paddle.ones_like(match_labels), match_labels) matches = matches.flatten() match_labels = match_labels.flatten() return matches, match_labels
def get_in_gt_and_in_center_info(self, flatten_center_and_stride, gt_bboxes): num_gt = gt_bboxes.shape[0] flatten_x = flatten_center_and_stride[:, 0].unsqueeze(1).tile([1, num_gt]) flatten_y = flatten_center_and_stride[:, 1].unsqueeze(1).tile([1, num_gt]) flatten_stride_x = flatten_center_and_stride[:, 2].unsqueeze(1).tile( [1, num_gt]) flatten_stride_y = flatten_center_and_stride[:, 3].unsqueeze(1).tile( [1, num_gt]) # is prior centers in gt bboxes, shape: [n_center, n_gt] l_ = flatten_x - gt_bboxes[:, 0] t_ = flatten_y - gt_bboxes[:, 1] r_ = gt_bboxes[:, 2] - flatten_x b_ = gt_bboxes[:, 3] - flatten_y deltas = paddle.stack([l_, t_, r_, b_], axis=1) is_in_gts = deltas.min(axis=1) > 0 is_in_gts_all = is_in_gts.sum(axis=1) > 0 # is prior centers in gt centers gt_center_xs = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0 gt_center_ys = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0 ct_bound_l = gt_center_xs - self.center_radius * flatten_stride_x ct_bound_t = gt_center_ys - self.center_radius * flatten_stride_y ct_bound_r = gt_center_xs + self.center_radius * flatten_stride_x ct_bound_b = gt_center_ys + self.center_radius * flatten_stride_y cl_ = flatten_x - ct_bound_l ct_ = flatten_y - ct_bound_t cr_ = ct_bound_r - flatten_x cb_ = ct_bound_b - flatten_y ct_deltas = paddle.stack([cl_, ct_, cr_, cb_], axis=1) is_in_cts = ct_deltas.min(axis=1) > 0 is_in_cts_all = is_in_cts.sum(axis=1) > 0 # in any of gts or gt centers, shape: [n_center] is_in_gts_or_centers_all = paddle.logical_or(is_in_gts_all, is_in_cts_all) is_in_gts_or_centers_all_inds = paddle.nonzero( is_in_gts_or_centers_all).squeeze(1) # both in gts and gt centers, shape: [num_fg, num_gt] is_in_gts_and_centers = paddle.logical_and( paddle.gather(is_in_gts.cast('int'), is_in_gts_or_centers_all_inds, axis=0).cast('bool'), paddle.gather(is_in_cts.cast('int'), is_in_gts_or_centers_all_inds, axis=0).cast('bool')) return is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_gts_and_centers
def filter_box_by_weight(self, pred, target, weight): """ Filter out boxes where ttf_reg_weight is 0, only keep positive samples. """ index = paddle.nonzero(weight > 0) index.stop_gradient = True weight = paddle.gather_nd(weight, index) pred = paddle.gather_nd(pred, index) target = paddle.gather_nd(target, index) return pred, target, weight
def apply_single(self, pred, tagmap): if tagmap.numpy()[:, :, 3].sum() == 0: return (paddle.zeros([1]), paddle.zeros([1])) nonzero = paddle.nonzero(tagmap[:, :, 3] > 0) if nonzero.shape[0] == 0: return (paddle.zeros([1]), paddle.zeros([1])) p_inds = paddle.unique(nonzero[:, 0]) num_person = p_inds.shape[0] if num_person == 0: return (paddle.zeros([1]), paddle.zeros([1])) pull = 0 tagpull_num = 0 embs_all = [] person_unvalid = 0 for person_idx in p_inds.numpy(): valid_single = tagmap[person_idx.item()] validkpts = paddle.nonzero(valid_single[:, 3] > 0) valid_single = paddle.index_select(valid_single, validkpts) emb = paddle.gather_nd(pred, valid_single[:, :3]) if emb.shape[0] == 1: person_unvalid += 1 mean = paddle.mean(emb, axis=0) embs_all.append(mean) pull += paddle.mean(paddle.pow(emb - mean, 2), axis=0) tagpull_num += emb.shape[0] pull /= max(num_person - person_unvalid, 1) if num_person < 2: return pull, paddle.zeros([1]) embs_all = paddle.stack(embs_all) A = embs_all.expand([num_person, num_person]) B = A.transpose([1, 0]) diff = A - B diff = paddle.pow(diff, 2) push = paddle.exp(-diff) push = paddle.sum(push) - num_person push /= 2 * num_person * (num_person - 1) return pull, push
def flat_words(words, pad_index=0): mask = words != pad_index lens = paddle.sum(paddle.cast(mask, "int64"), axis=-1) position = paddle.cumsum( lens + paddle.cast((lens == 0), "int64"), axis=1) - 1 select = paddle.nonzero(mask) words = paddle.gather_nd(words, select) lens = paddle.sum(lens, axis=-1) words = pad_sequence_paddle(words, lens, pad_index) max_len = words.shape[1] position = mask_fill(position, position >= max_len, max_len - 1) return words, position
def my_gt_argmax(overlaps): gt_max_overlaps = torch.max(overlaps, axis=0) gt_max_mask = overlaps == gt_max_overlaps gt_argmax_overlaps = [] for i in range(overlaps.shape[-1]): gt_max_inds = torch.nonzero(gt_max_mask.cast('int')[:, i], as_tuple=False).flatten() gt_max_ind = torch.gather(gt_max_inds, torch.randperm(gt_max_inds.numel())[0]) gt_argmax_overlaps.append(gt_max_ind) gt_argmax_overlaps = cat(gt_argmax_overlaps) return gt_argmax_overlaps
def subsample_labels(labels, num_samples, positive_fraction): positive = torch.nonzero(mul((labels != config.ignore_label).cast('int'), (labels != 0).cast('int')).cast('bool'), as_tuple=False).squeeze(1) negative = torch.nonzero(labels == 0, as_tuple=False).squeeze(1) num_pos = int(num_samples * positive_fraction) num_pos = min(positive.numel(), num_pos) num_neg = num_samples - num_pos num_neg = min(negative.numel(), num_neg) # randomly select positive and negative examples if type(num_pos) == torch.Tensor: num_pos = num_pos.numpy().item() if type(num_neg) == torch.Tensor: num_neg = num_neg.numpy().item() perm1 = torch.randperm(positive.numel())[:num_pos] perm2 = torch.randperm(negative.numel())[:num_neg] pos_idx = torch.gather(positive, perm1) neg_idx = torch.gather(negative, perm2) return pos_idx, neg_idx