def mask_loss(self, mask_pred, mask_eoc, mask_target, matches, bt_target): samples = matches >= 0 pos_num = samples.sum(axis=-1).asnumpy().astype('int') rank = (-matches).argsort(axis=-1) # pos_bboxes = [] # pos_masks = [] # mask_preds = [] losses = [] for i in range(mask_pred.shape[0]): if pos_num[i] == 0: losses.append(nd.zeros(shape=(1, ), ctx=mask_pred.context)) continue idx = rank[i, :pos_num[i]] pos_bboxe = nd.take(bt_target[i], idx) area = (pos_bboxe[:, 3] - pos_bboxe[:, 1]) * (pos_bboxe[:, 2] - pos_bboxe[:, 0]) weight = self.gt_weidth * self.gt_height / area mask_gt = mask_target[i, matches[i, idx], :, :] mask_preds = nd.sigmoid( nd.dot(nd.take(mask_eoc[i], idx), mask_pred[i])) _, h, w = mask_preds.shape mask_preds = self.crop(pos_bboxe, h, w, mask_preds) loss = self.SBCELoss(mask_preds, mask_gt) * weight # loss = 0.5 * nd.square(mask_gt - mask_preds) / (mask_gt.shape[0]*mask_gt.shape[1]*mask_gt.shape[2]) losses.append(nd.mean(loss)) return nd.concat(*losses, dim=0)
def ssd_calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks): cls_loss = gluon.loss.SoftmaxCrossEntropyLoss() #bbox_loss = gluon.loss.L1Loss() bbox_loss = gluon.loss.HuberLoss() #print(cls_preds.shape, cls_labels.shape) batch_size, anchor_size, cls_num = cls_preds.shape cls_preds_ = nd.reshape(cls_preds, (-1, cls_preds.shape[-1])) cls_labels_ = nd.reshape(cls_labels, (-1, 1)) #cls_mask = (cls_labels_[:,0] >= 0).reshape( cls_labels_.shape ) #???? including background? cls_mask = (cls_labels_[:, 0] > 0).reshape( cls_labels_.shape) # ???? including background? indices = nd.array(np.where(cls_mask.asnumpy() > 0)[0], ctx=cls_preds.context) cls_preds_valid = nd.take(cls_preds_, indices) cls_labels_valid = nd.take(cls_labels_, indices) cls = cls_loss(cls_preds_valid, cls_labels_valid) bbox_labels = nd.reshape(bbox_labels, (-1, 4)) bbox_masks = nd.sum(nd.reshape(bbox_masks, (-1, 4)), axis=-1) bbox_preds = nd.reshape(bbox_preds, (-1, 4)) indices = nd.array(np.where(bbox_masks.asnumpy() > 0)[0], ctx=bbox_preds.context) bbox_labels_valid = nd.take(bbox_labels, indices) bbox_preds_valid = nd.take(bbox_preds, indices) bbox = bbox_loss(bbox_preds_valid, bbox_labels_valid) return (cls.mean() + bbox.mean()) * batch_size, cls.mean(), bbox.mean()
def data_iter(batch_size, features, labels): num_examples = len(features) indices = list(range(num_examples)) indices = nd.array(indices) nd.random.shuffle(indices) for i in range(0, num_examples, batch_size): j = nd.array(indices[i: min(i + batch_size, num_examples)]) yield nd.take(features, j, axis=0), nd.take(labels, j, axis=0)
def __iter__(self): data = self.dataset[:] X = nd.array(data[0]) y = nd.array(data[1]) n = len(X) idx = np.arange(n) if self.shuffle: np.random.shuffle(idx) for i in range(0, n, self.batch_size): j = nd.array(idx[i:min(i + self.batch_size, n)]) yield nd.take(X, j), nd.take(y, j)
def hybrid_forward(self, F, feat, label, center_features): hist = nd.array(np.bincount(label.asnumpy()), ctx=feat.context) cls_count = nd.take(hist, label) centers_selected = F.take(center_features, label) diff = feat - centers_selected loss = self.beta * 0.5 * F.sum(F.square(diff), 1) / cls_count return F.mean(loss, axis=0, exclude=True)
def _rnn_train(self, X, NX, NX_rep, graph_to_rnn, rnn_to_graph, NX_cum): X_avg = fn.SegmentSumFn(NX_rep, NX.shape[0])(X) / nd.cast( fn.unsqueeze(NX, 1), 'float32') X_curr = nd.take(X, indices=NX_cum - 1) X = nd.concat(X_avg, X_curr, dim=1) # rnn X = nd.take( X, indices=graph_to_rnn) # batch_size, iw_size, length, num_features batch_size, iw_size, length, num_features = X.shape X = X.reshape([batch_size * iw_size, length, num_features]) X = self.rnn(X) X = X.reshape([batch_size, iw_size, length, -1]) X = nd.gather_nd(X, indices=rnn_to_graph) return X
def mask_loss(self, mask_pred, mask_eoc, mask_target, matches, bt_target): samples = matches >= 0 pos_num = samples.sum(axis=-1).asnumpy().astype('int') rank = (-matches).argsort(axis=-1) # pos_bboxes = [] # pos_masks = [] # mask_preds = [] losses = [] for i in range(mask_pred.shape[0]): idx = rank[i, :pos_num[i]] pos_bboxe = nd.take(bt_target[i], idx) mask_gt = mask_target[i, matches[i, idx], :, :] mask_preds = nd.sigmoid( nd.dot(nd.take(mask_eoc[i], idx), mask_pred[i])) _, h, w = mask_preds.shape mask_preds = self.crop(pos_bboxe, h, w, mask_preds) loss = self.SBCELoss(mask_preds, mask_gt) # loss = 0.5 * nd.square(mask_gt - mask_preds) / (mask_gt.shape[0]*mask_gt.shape[1]*mask_gt.shape[2]) losses.append(nd.sum(loss) / len(loss)) return nd.concat(*losses, dim=0)
def _rnn_test(self, X, NX, NX_rep, NX_cum, h): # note: one partition for one molecule X_avg = fn.SegmentSumFn(NX_rep, NX.shape[0])(X) / nd.cast( fn.unsqueeze(NX, 1), 'float32') X_curr = nd.take(X, indices=NX_cum - 1) X = nd.concat(X_avg, X_curr, dim=1) # size: [NX, F_in * 2] # rnn X = fn.unsqueeze(X, axis=1) X, h = self.rnn(X, h) X = fn.squeeze(X, axis=1) return X, h
def calIOU(anchor, gt): assert len(anchor.shape) in (1,2,3) assert len(gt.shape) in (1,2,3) anchor = anchor.reshape((-1,4)) if len(gt.shape) < 3: gt = gt.reshape((1,1,4)) if len(gt.shape) == 1 else nd.expand_dims(gt, axis=0) anchor = nd.expand_dims(anchor, axis=1) gt = nd.expand_dims(gt, axis=1) max_tl = nd.maximum(nd.take(anchor, nd.array([0,1]), axis=-1), nd.take(gt, nd.array([0,1]), axis=-1)) min_br = nd.minimum(nd.take(anchor, nd.array([2,3]), axis=-1), nd.take(gt, nd.array([2,3]), axis=-1)) area = nd.prod(min_br-max_tl, axis=-1) i = nd.where((max_tl >= min_br).sum(axis=-1), nd.zeros_like(area), area) anchor_area = nd.prod(anchor[:,:,2:]-anchor[:,:,:2], axis=-1) gt_area = nd.prod(gt[:,:,:,2:]-gt[:,:,:,:2], axis=-1) total_area = anchor_area + gt_area - i iou = i / total_area return iou
def forward(self, feature, label): ''' :param feature: output feature matrix, mini-batch的输出特征矩阵 :param label: input label matrix, mini-batch的输入标签 ''' # embedding to get label related features 通过嵌入层得到label对应的feature label_count = nd.array(np.bincount(label.asnumpy().astype(int)), self.ctx) count = nd.take(label_count, label) embeddings= self.embedding(label) # reshape from (batch, 1, feature_size) to (batch, feature_size) reshape矩阵 embeddings = embeddings.reshape((-1, self.feature_size)) diff = feature - embeddings # calculate diff 计算差值 loss = self.lmbd * 0.5 * nd.sum(nd.square(diff), axis=1) / count return nd.mean(loss, axis=0, exclude=True) # 这一步似乎是完全没有必要的
def validate(net, val_data, val_items, val_shapes, ctx, size, classes): """Test on validation dataset.""" clipper = gcv.nn.bbox.BBoxClipToImage() net.hybridize(static_alloc=True) print("---Detect Total {:d} Image Start.---".format(len(val_items))) result_dict = {} for ib, (batch, item) in enumerate(zip(val_data, val_items)): batch = split_and_load(batch, ctx_list=ctx) for x, y, im_scale in zip(*batch): ids, scores, bboxes = net(x) bboxes = clipper(bboxes, x) im_scale = im_scale.reshape((-1)).asscalar() bboxes *= im_scale inds = nd.argsort(nd.squeeze(ids, axis=(0, 2)), is_ascend=False) ids = nd.squeeze(ids, axis=(0, 2)).asnumpy().astype(np.int8).tolist() valid_ids = [id for id in ids if id is not -1] valid_len = len(valid_ids) if valid_len > 0: # valid_len must > 0 inds = nd.slice_axis(inds, begin=0, end=valid_len, axis=0) scores = nd.take(scores, inds, axis=1) bboxes = nd.take(bboxes, inds, axis=1) scores = scores.asnumpy() bboxes = bboxes.asnumpy() for i, id in enumerate(valid_ids): score = scores[:, i, 0][0] xmin, ymin, xmax, ymax = bboxes[:, i, 0][ 0], bboxes[:, i, 1][0], bboxes[:, i, 2][0], bboxes[:, i, 3][0] result_dict[id] = result_dict.get( id, []) + [[item, score, xmin, ymin, xmax, ymax]] print("Detect Image {:s} Done.".format(item)) print("---Detect Total {:d} Image Done.---".format(len(val_items))) return result_dict
def forward(self, x, sampled_values, label): """Forward computation.""" sampled_candidates, _, _ = sampled_values # (batch_size,) label = label.reshape(shape=(-1, )) # (num_sampled+batch_size,) ids = nd.concat(sampled_candidates, label, dim=0) # lookup weights and biases weight = self.weight.row_sparse_data(ids) bias = self.bias.data(ids.context) # (num_sampled+batch_size, dim) w_all = nd.Embedding(data=ids, weight=weight, **self._kwargs) # (num_sampled+batch_size,) b_all = nd.take(bias, indices=ids) out, new_targets = self._logits(x, sampled_values, label, w_all, b_all) return out, new_targets
def hybrid_forward(self, F, x, sampled_candidates, expected_count_sampled, expected_count_true, label, weight, bias): """Forward computation.""" # (batch_size,) label = F.reshape(label, shape=(-1,)) # (num_sampled+batch_size,) ids = F.concat(sampled_candidates, label, dim=0) # lookup weights and biases # (num_sampled+batch_size, dim) w_all = F.Embedding(data=ids, weight=weight, input_dim=self._num_classes, output_dim=self._in_unit, sparse_grad=True) # (num_sampled+batch_size, 1) b_all = nd.take(bias, indices=ids) return self._logits(x, sampled_candidates, expected_count_sampled, expected_count_true, label, w_all, b_all)
def forward(self, x, sampled_values, label): """Forward computation.""" sampled_candidates, _, _ = sampled_values # (batch_size,) label = label.reshape(shape=(-1,)) # (num_sampled+batch_size,) ids = nd.concat(sampled_candidates, label, dim=0) # lookup weights and biases weight = self.weight.row_sparse_data(ids) bias = self.bias.data(ids.context) # (num_sampled+batch_size, dim) w_all = nd.Embedding(data=ids, weight=weight, **self._kwargs) # (num_sampled+batch_size,) b_all = nd.take(bias, indices=ids) out, new_targets = self._dense(x, sampled_values, label, w_all, b_all) return out, new_targets
def _gather_nd(index, src): ctx = context(src) shp = index.shape ndim = src.ndim offsets = [] stride = 1 for i in reversed(range(1, ndim)): di = shp[i] offset_i = nd.arange(di, dtype=index.dtype) offsets.append((stride * offset_i).reshape((1, ) * i + (di, ) + (1, ) * (ndim - 1 - i))) stride *= di new_idx = index * stride + copy_to(sum(offsets), ctx) src = src.reshape(-1) new_idx = new_idx.reshape(-1) rst = nd.take(src, new_idx).reshape(shp) return rst
def _gather_nd(index, src): """Similar to PyTorch's gather nd on first dimension.""" ctx = context(src) shp = index.shape ndim = src.ndim offsets = [] stride = 1 for i in reversed(range(1, ndim)): di = shp[i] offset_i = nd.arange(di, dtype=index.dtype) offsets.append( (stride * offset_i).reshape((1,) * i + (di,) + (1,) * (ndim - 1 - i))) stride *= di if ndim > 1: new_idx = index * stride + copy_to(sum(offsets), ctx) else: new_idx = index src = src.reshape(-1) new_idx = new_idx.reshape(-1) rst = nd.take(src, new_idx).reshape(shp) return rst
def matmul_maybe_select(A, B): """Perform Matrix multiplication C = A * B but A could be an integer id vector. If A is an integer vector, we treat it as multiplying a one-hot encoded tensor. In this case, the expensive dense matrix multiply can be replaced by a much cheaper index lookup. For example, :: A = [2, 0, 1], B = [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]] then matmul_maybe_select(A, B) is equivalent to :: [[0, 0, 1], [[0.1, 0.2], [1, 0, 0], * [0.3, 0.4], [0, 1, 0]] [0.5, 0.6]] In all other cases, perform a normal matmul. Parameters ---------- A : torch.Tensor lhs tensor B : torch.Tensor rhs tensor Returns ------- C : torch.Tensor result tensor """ if A.dtype in (np.int32, np.int64) and len(A.shape) == 1: return nd.take(B, A, axis=0) else: return nd.dot(A, B)
def bmm_maybe_select(A, B, index): """Slice submatrices of A by the given index and perform bmm. B is a 3D tensor of shape (N, D1, D2), which can be viewed as a stack of N matrices of shape (D1, D2). The input index is an integer vector of length M. A could be either: (1) a dense tensor of shape (M, D1), (2) an integer vector of length M. The result C is a 2D matrix of shape (M, D2) For case (1), C is computed by bmm: :: C[i, :] = matmul(A[i, :], B[index[i], :, :]) For case (2), C is computed by index select: :: C[i, :] = B[index[i], A[i], :] Parameters ---------- A : torch.Tensor lhs tensor B : torch.Tensor rhs tensor index : torch.Tensor index tensor Returns ------- C : torch.Tensor return tensor """ if A.dtype in (np.int32, np.int64) and len(A.shape) == 1: return B[index, A, :] else: BB = nd.take(B, index, axis=0) return nd.batch_dot(A.expand_dims(1), BB).squeeze()
def forward(self, cls_targets, ctr_targets, box_targets, mask_targets, matches, cls_preds, ctr_preds, box_preds, mask_preds, maskcoe_preds): """Compute loss in entire batch across devices.""" scale = 4 # require results across different devices at this time cls_targets, ctr_targets, box_targets, mask_targets, matches, cls_preds, ctr_preds, box_preds, mask_preds, maskcoe_preds = \ [_as_list(x) for x in (cls_targets, ctr_targets, box_targets, mask_targets, matches, cls_preds, ctr_preds, box_preds, mask_preds, maskcoe_preds)] # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] ctr_losses = [] box_losses = [] mask_losses = [] sum_losses = [] for clst, ctrt, boxt, maskt, matche, clsp, ctrp, boxp, maskp, maskcoep in zip( *[ cls_targets, ctr_targets, box_targets, mask_targets, matches, cls_preds, ctr_preds, box_preds, mask_preds, maskcoe_preds ]): pos_gt_mask = clst > 0 # cls loss if not self._from_logits: clsp = nd.sigmoid(clsp) one_hot = nd.one_hot(clst, self._num_class) one_hot = nd.slice_axis(one_hot, begin=1, end=None, axis=-1) pt = nd.where(one_hot, clsp, 1 - clsp) t = nd.ones_like(one_hot) alpha = nd.where(one_hot, self._alpha * t, (1 - self._alpha) * t) cls_loss = -alpha * ( (1 - pt)**self._gamma) * nd.log(nd.minimum(pt + self._eps, 1)) cls_loss = nd.sum(cls_loss) / nd.maximum(nd.sum(pos_gt_mask), 1) cls_losses.append(cls_loss) # ctr loss ctrp = nd.squeeze(ctrp, axis=-1) pos_pred_mask = ctrp >= 0 ctr_loss = (ctrp * pos_pred_mask - ctrp * ctrt + nd.log(1 + nd.exp(-nd.abs(ctrp)))) * pos_gt_mask ctr_loss = nd.sum(ctr_loss) / nd.maximum(nd.sum(pos_gt_mask), 1) ctr_losses.append(ctr_loss) # box loss // iou loss px1, py1, px2, py2 = nd.split(boxp, num_outputs=4, axis=-1, squeeze_axis=True) gx1, gy1, gx2, gy2 = nd.split(boxt, num_outputs=4, axis=-1, squeeze_axis=True) apd = nd.abs(px2 - px1 + 1) * nd.abs(py2 - py1 + 1) agt = nd.abs(gx2 - gx1 + 1) * nd.abs(gy2 - gy1 + 1) iw = nd.maximum( nd.minimum(px2, gx2) - nd.maximum(px1, gx1) + 1., 0.) ih = nd.maximum( nd.minimum(py2, gy2) - nd.maximum(py1, gy1) + 1., 0.) ain = iw * ih + 1. union = apd + agt - ain + 1 ious = nd.maximum(ain / union, 0.) fg_mask = nd.where(clst > 0, nd.ones_like(clst), nd.zeros_like(clst)) box_loss = -nd.log(nd.minimum(ious + self._eps, 1.)) * fg_mask if self._return_iou: box_loss = nd.sum(box_loss) / nd.maximum(nd.sum(fg_mask), 1), ious else: box_loss = nd.sum(box_loss) / nd.maximum(nd.sum(fg_mask), 1) box_losses.append(box_loss) # mask loss rank = (-matche).argsort(axis=-1) rank = nd.split(rank, 2, axis=0, squeeze_axis=True) matche = nd.split(matche, 2, axis=0, squeeze_axis=True) maskp = nd.split(maskp, 2, axis=0, squeeze_axis=True) maskt = nd.split(maskt, 2, axis=0, squeeze_axis=True) boxt = nd.split(boxt, 2, axis=0, squeeze_axis=True) maskcoep = nd.split(maskcoep, 2, axis=0, squeeze_axis=True) agt = nd.split(agt, 2, axis=0, squeeze_axis=True) mask_loss = [] for ranki, matchei, maskpi, maskti, boxti, maskcoepi, agti in zip( rank, matche, maskp, maskt, boxt, maskcoep, agt): idx = nd.slice(ranki, 0, 200) pos_mask = nd.take(matchei >= 0, idx) pos_box = nd.take(boxti, idx) area = nd.take(agti, idx) weight = (self.gt_weidth * self.gt_height / (area + self._eps)) * pos_mask mask_idx = nd.take(matchei, idx) maskti = nd.take(maskti, mask_idx) maskpi = nd.dot(nd.take(maskcoepi, idx), maskpi) maskpi = nd.sigmoid(maskpi) with autograd.pause(): _h = nd.arange(186, ctx=maskpi.context) _w = nd.arange(186, ctx=maskpi.context) _h = nd.tile(_h, reps=(pos_box.shape[0], 1)) _w = nd.tile(_w, reps=(pos_box.shape[0], 1)) x1, y1, x2, y2 = nd.split(nd.round(pos_box / scale), num_outputs=4, axis=-1) _w = (_w >= x1) * (_w <= x2) _h = (_h >= y1) * (_h <= y2) _mask = nd.batch_dot(_h.expand_dims(axis=-1), _w.expand_dims(axis=-1), transpose_b=True) maskpi = maskpi * _mask mask_loss.append( nd.sum(self.SBCELoss(maskpi, maskti) * weight) / nd.sum(pos_mask + self._eps)) # if sum(pos_num)>1400: # print(sum(pos_num)) # print(pos_num) # pos_num = (matche >=0).sum(axis=-1).asnumpy() # rank = (-matche).argsort(axis=-1) # mask_loss = [] # for i in range(maskp.shape[0]): # if pos_num[i] == 0.: # # print(pos_num) # mask_loss.append(nd.zeros(shape=(1,), ctx=maskp.context)) # continue # idx = rank[i, :int(pos_num[i])] # pos_box = nd.take(boxt[i], idx) # area = (pos_box[:, 3] - pos_box[:, 1]) * (pos_box[:, 2] - pos_box[:, 0]) # weight = self.gt_weidth * self.gt_height / (area+self._eps) # maskti = maskt[i, matche[i, idx], :, :] # maskpi = nd.dot(nd.take(maskcoep[i], idx), maskp[i]) # _, h, w = maskpi.shape # maskpi = nd.sigmoid(maskpi) # with autograd.pause(): # _h = nd.arange(h, ctx=maskpi.context) # _w = nd.arange(w, ctx=maskpi.context) # _h = nd.tile(_h, reps=(pos_box.shape[0], 1)) # _w = nd.tile(_w, reps=(pos_box.shape[0], 1)) # x1, y1, x2, y2 = nd.split(nd.round(pos_box / scale), num_outputs=4, axis=-1) # _w = (_w >= x1) * (_w <= x2) # _h = (_h >= y1) * (_h <= y2) # _mask = nd.batch_dot(_h.expand_dims(axis=-1), _w.expand_dims(axis=-1), transpose_b=True) # maskpi = maskpi * _mask # mask_loss.append(nd.sum(self.SBCELoss(maskpi, maskti) * weight)/pos_num[i]) mask_loss = nd.mean(nd.concat(*mask_loss, dim=0)) mask_losses.append(mask_loss) sum_losses.append(self._cls_lambd * cls_losses[-1] + self._ctr_lambd * ctr_losses[-1] + self._box_lambd * box_losses[-1] + self._mask_lambd * mask_losses[-1]) return sum_losses, cls_losses, ctr_losses, box_losses, mask_losses
def forward(self, is_train, req, in_data, out_data, aux): nms_start_time = time.time() #inputs cls_score = in_data[0] bbox_pred = in_data[1] rois = in_data[2] im_info = in_data[3] fc_all_2_relu = in_data[4] nms_rank_weight = in_data[5] nms_rank_bias = in_data[6] roi_feat_embedding_weight = in_data[7] roi_feat_embedding_bias = in_data[8] nms_pair_pos_fc1_1_weight = in_data[9] nms_pair_pos_fc1_1_bias = in_data[10] nms_query_1_weight = in_data[11] nms_query_1_bias = in_data[12] nms_key_1_weight = in_data[13] nms_key_1_bias = in_data[14] nms_linear_out_1_weight = in_data[15] nms_linear_out_1_bias = in_data[16] nms_logit_weight = in_data[17] nms_logit_bias = in_data[18] if self.has_non_gt_index: non_gt_index = in_data[19] else: non_gt_index = None if self.nongt_dim is not None: cls_score_nongt = nd.slice_axis(data=cls_score, axis=0, begin=0, end=self.nongt_dim) # cls_score_nongt = monitor_wrapper(cls_score_nongt, 'cls_score_nongt') bbox_pred_nongt = nd.slice_axis(data=bbox_pred, axis=0, begin=0, end=self.nongt_dim) elif non_gt_index is not None: cls_score_nongt = nd.take(a=cls_score, indices=non_gt_index) bbox_pred_nongt = nd.take(a=bbox_pred, indices=non_gt_index) else: cls_score_nongt = cls_score bbox_pred_nongt = bbox_pred bbox_pred_nongt = nd.BlockGrad(bbox_pred_nongt) # remove batch idx and gt roi sliced_rois = nd.slice_axis(data=rois, axis=1, begin=1, end=None) if self.nongt_dim is not None: sliced_rois = nd.slice_axis(data=sliced_rois, axis=0, begin=0, end=self.nongt_dim) elif non_gt_index is not None: sliced_rois = nd.take(a=sliced_rois, indices=non_gt_index) # bbox_pred_nobg, [num_rois, 4*(num_reg_classes-1)] bbox_pred_nobg = nd.slice_axis(data=bbox_pred_nongt, axis=1, begin=4, end=None) # [num_boxes, 4, num_reg_classes-1] refined_bbox = refine_bbox_nd(sliced_rois, bbox_pred_nobg, im_info, means=self.bbox_means, stds=self.bbox_stds) # softmax cls_score to cls_prob, [num_rois, num_classes] cls_prob = nd.softmax(data=cls_score_nongt, axis=-1) cls_prob_nobg = nd.slice_axis(cls_prob, axis=1, begin=1, end=None) sorted_cls_prob_nobg = nd.sort(data=cls_prob_nobg, axis=0, is_ascend=False) # sorted_score, [first_n, num_fg_classes] sorted_score = nd.slice_axis(sorted_cls_prob_nobg, axis=0, begin=0, end=self.first_n, name='sorted_score') max_score_per_class = sorted_score.max(axis=0) max_score_per_class_numpy = max_score_per_class.asnumpy() valid_class_thresh = self.class_thresh valid_class_thresh = np.minimum(valid_class_thresh, max_score_per_class_numpy.max()) valid_class_indices = np.where( max_score_per_class_numpy >= valid_class_thresh)[0] invalid_class_indices = np.where( max_score_per_class_numpy < valid_class_thresh)[0] num_valid_classes = len(valid_class_indices) valid_class_indices_nd = nd.array(valid_class_indices, ctx=sorted_score.context) # sort by score rank_indices = nd.argsort(data=cls_prob_nobg, axis=0, is_ascend=False) # first_rank_indices, [first_n, num_fg_classes] first_rank_indices = nd.slice_axis(rank_indices, axis=0, begin=0, end=self.first_n) valid_first_rank_indices = first_rank_indices.transpose().take( valid_class_indices_nd).transpose() # sorted_bbox, [first_n, num_fg_classes, 4, num_reg_classes-1] sorted_bbox = nd.take(a=refined_bbox, indices=first_rank_indices) if self.class_agnostic: # sorted_bbox, [first_n, num_fg_classes, 4] sorted_bbox = nd.Reshape(sorted_bbox, shape=(0, 0, 0), name='sorted_bbox') else: cls_mask = nd.arange(0, self.num_fg_classes) cls_mask = nd.Reshape(cls_mask, shape=(1, -1, 1)) cls_mask = nd.broadcast_to(cls_mask, shape=(self.first_n, 0, 4)) # sorted_bbox, [first_n, num_fg_classes, 4] sorted_bbox = nd.pick(data=sorted_bbox, name='sorted_bbox', index=cls_mask, axis=3) valid_sorted_bbox = sorted_bbox.transpose( (1, 0, 2)).take(valid_class_indices_nd).transpose((1, 0, 2)) # sorted_bbox = monitor_wrapper(sorted_bbox, 'sorted_bbox') # nms_rank_embedding, [first_n, 1024] nms_rank_embedding = extract_rank_embedding_nd(self.first_n, 1024) # nms_rank_feat, [first_n, 1024] nms_rank_feat = nd.FullyConnected(name='nms_rank', data=nms_rank_embedding, num_hidden=128, weight=nms_rank_weight, bias=nms_rank_bias) # nms_position_matrix, [num_valid_classes, first_n, first_n, 4] nms_position_matrix = extract_multi_position_matrix_nd( valid_sorted_bbox) # roi_feature_embedding, [num_rois, 1024] # fc_all_2_relu = monitor_wrapper(fc_all_2_relu, 'fc_all_2_relu') roi_feat_embedding = nd.FullyConnected( name='roi_feat_embedding', data=fc_all_2_relu, num_hidden=128, weight=roi_feat_embedding_weight, bias=roi_feat_embedding_bias) # sorted_roi_feat, [first_n, num_valid_classes, 128] sorted_roi_feat = nd.take(a=roi_feat_embedding, indices=valid_first_rank_indices) # vectorized nms # nms_embedding_feat, [first_n, num_valid_classes, 128] nms_embedding_feat = nd.broadcast_add(lhs=sorted_roi_feat, rhs=nd.expand_dims(nms_rank_feat, axis=1)) # nms_attention_1, [first_n, num_valid_classes, 1024] nms_attention_1 = nms_attention_nd( nms_embedding_feat, nms_position_matrix, nms_pair_pos_fc1_1_weight, nms_pair_pos_fc1_1_bias, nms_query_1_weight, nms_query_1_bias, nms_key_1_weight, nms_key_1_bias, nms_linear_out_1_weight, nms_linear_out_1_bias, num_rois=self.first_n, index=1, group=self.nms_attention_group, dim=self.nms_attention_dim, fc_dim=self.nms_attention_fc_dim, feat_dim=self.nms_attention_feat_dim) nms_all_feat_1 = nms_embedding_feat + nms_attention_1 nms_all_feat_1_relu = nd.Activation(data=nms_all_feat_1, act_type='relu', name='nms_all_feat_1_relu') # [first_n * num_valid_classes, 1024] nms_all_feat_1_relu_reshape = nd.Reshape(nms_all_feat_1_relu, shape=(-3, -2)) # logit, [first_n * num_valid_classes, num_thresh] nms_conditional_logit = nd.FullyConnected( name='nms_logit', data=nms_all_feat_1_relu_reshape, num_hidden=self.num_thresh, weight=nms_logit_weight, bias=nms_logit_bias) # logit_reshape, [first_n, num_valid_classes, num_thresh] nms_conditional_logit_reshape = nd.Reshape(nms_conditional_logit, shape=(self.first_n, num_valid_classes, self.num_thresh)) nms_conditional_score = nd.Activation( data=nms_conditional_logit_reshape, act_type='sigmoid', name='nms_conditional_score') if num_valid_classes == self.num_fg_classes: full_nms_conditional_score = nms_conditional_score else: full_nms_conditional_score = nd.concat( nms_conditional_score, nd.zeros( (self.first_n, self.num_fg_classes - num_valid_classes, self.num_thresh), ctx=nms_conditional_score.context), dim=1) all_indexes = np.concatenate( (valid_class_indices, invalid_class_indices)) restore_indexes = np.zeros((self.num_fg_classes)) restore_indexes[all_indexes] = np.arange(self.num_fg_classes) restore_indexes = nd.array(restore_indexes, ctx=nms_conditional_score.context) full_nms_conditional_score = full_nms_conditional_score.transpose( (1, 0, 2)).take(restore_indexes).transpose((1, 0, 2)) sorted_score_reshape = nd.expand_dims(sorted_score, axis=2) # sorted_score_reshape = nd.BlockGrad(sorted_score_reshape) nms_multi_score = nd.broadcast_mul(lhs=sorted_score_reshape, rhs=full_nms_conditional_score) _ = nms_multi_score.mean().asnumpy() all_time = time.time() - nms_start_time if 'learn_nms_time' not in globals().keys( ) or 'learn_nms_count' not in globals().keys(): globals()['learn_nms_time'] = [] globals()['learn_nms_count'] = 0 if globals()['learn_nms_count'] >= 1000: globals()['learn_nms_time'].pop(0) globals()['learn_nms_time'].append(all_time) else: globals()['learn_nms_time'].append(all_time) globals()['learn_nms_count'] += 1 if globals()['learn_nms_count'] % 250 == 0: print("--->> learn nms running average time cost: {}".format( float(sum(globals()['learn_nms_time'])) / (1000 if globals()['learn_nms_count'] > 1000 else globals()['learn_nms_count']))) self.assign(out_data[0], req[0], nms_multi_score) self.assign(out_data[1], req[1], sorted_bbox) self.assign(out_data[2], req[2], sorted_score)
def test_take(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) idx = nd.arange(LARGE_X-1000, LARGE_X) res = nd.take(a, idx) assert np.sum(res[-1].asnumpy() == 1) == res.shape[1]
def test_take(): a = nd.ones(shape=(LARGE_X, SMALL_Y)) idx = nd.arange(LARGE_X - 1000, LARGE_X) res = nd.take(a, idx) assert np.sum(res[-1].asnumpy() == 1) == res.shape[1]
def test_take(): a = nd.ones(shape=LARGE_X) idx = nd.arange(LARGE_X - 1000, LARGE_X) res = nd.take(a, idx) assert np.sum(res.asnumpy() == 1) == res.shape[0]
def generate_targets(self, img, boxes): """ img : [H, W, 3] boxes : [N, 5] """ rh, rw, _ = img.shape rh, rw = int(rh/4), int(rw/4) rx = nd.arange(0, rw).reshape((1, -1)) ry = nd.arange(0, rh).reshape((-1, 1)) sx = nd.tile(rx, reps=(rh, 1)) sy = nd.tile(ry, reps=(1, rw)) x0, y0, x1, y1, _ = nd.split(boxes, 5, axis=-1, squeeze_axis=True) areas = (x1 - x0) * (y1 - y0) # areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) boxes_id = nd.argsort(areas) boxes_id = nd.concat(nd.array([-1]), boxes_id, dim=0) boxes = nd.take(boxes, nd.argsort(areas)) # min -> max boxes = nd.concat(nd.zeros((1, 5)), boxes, dim=0) # for gt assign confusion x0, y0, x1, y1, cls = nd.split(boxes, num_outputs=5, axis=-1, squeeze_axis=True) n = boxes.shape[0] # [H, W, N] of_l = sx.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(x0/4, axis=0), axis=0) of_t = sy.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(y0/4, axis=0), axis=0) of_r = -(sx.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(x1/4, axis=0), axis=0)) of_b = -(sy.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(y1/4, axis=0), axis=0)) # [H, W, N] eps = 1e-5 # ctr = nd.minimum(of_l, of_r) / (nd.maximum(of_l, of_r) + eps) * \ # nd.minimum(of_t, of_b) / (nd.maximum(of_t, of_b) + eps) # ctr = nd.sqrt(nd.abs(ctr)) # ctr[:, :, 0] = 0 # # flat ctr of_l = of_l * (of_l > 0) of_r = of_r * (of_r > 0) of_t = of_t * (of_t > 0) of_b = of_b * (of_b > 0) # ctr2 = nd.minimum(of_l, of_r) / (nd.maximum(of_l, of_r) + of_l + of_r) * \ # nd.minimum(of_t, of_b) / (nd.maximum(of_t, of_b) + of_t + of_b) # ctr2 = 3 * nd.sqrt(nd.abs(ctr2)) # ctr2[:, :, 0] = 0 # slim ctr # ctr = nd.minimum(of_l, of_r) / (nd.maximum(of_l, of_r) + nd.abs(of_l - of_r) + eps) * \ # nd.minimum(of_t, of_b) / (nd.maximum(of_t, of_b) + nd.abs(of_t - of_b) + eps) ctr = nd.minimum(of_l, of_r) / (nd.maximum(of_l, of_r) + eps) * \ nd.minimum(of_t, of_b) / (nd.maximum(of_t, of_b) + eps) # ctr = nd.power(0.8, 0.1 * nd.sqrt(nd.square(of_l - of_r) + nd.square(of_t - of_b) + eps)) # ctr = nd.power(0.8, nd.sqrt(nd.abs(of_l - of_r) + nd.abs(of_t - of_b) + eps)) ctr = nd.sqrt(nd.abs(ctr)) ctr[:, :, 0] = 0 # [H, W, N, 4] offsets = nd.concat(of_l.reshape(-2, 1), of_t.reshape(-2, 1), of_r.reshape(-2, 1), of_b.reshape(-2, 1), dim=-1) * 4. fh = int(np.ceil(rh / 2)) fw = int(np.ceil(rw / 2)) # fh = int(np.ceil(np.ceil(np.ceil(rh / 2) / 2) / 2)) # fw = int(np.ceil(np.ceil(np.ceil(rw / 2) / 2) / 2)) fm_list = [] for i in range(self._stages): fm_list.append((fh, fw)) fh = int(np.ceil(fh / 2)) fw = int(np.ceil(fw / 2)) fm_list = fm_list[::-1] cls_targets = [] ctr_targets = [] box_targets = [] match_targets = [] stride = int(self._stride/4) for i in range(self._stages): fh, fw = fm_list[i] # cls_target = nd.zeros((fh, fw)) # box_target = nd.zeros((fh, fw, 4)) # ctr_target = nd.zeros((fh, fw)) # match_target = nd.zeros((fh, fw)) cx = nd.arange(0, fw).reshape((1, -1)) cy = nd.arange(0, fh).reshape((-1, 1)) sx = nd.tile(cx, reps=(fh, 1)) sy = nd.tile(cy, reps=(1, fw)) syx = nd.stack(sy.reshape(-1), sx.reshape(-1)).transpose().astype('int32') # bugs in this type # bx = sxy[:, 0] * stride + nd.floor(sxy[:, 0] / 2).astype(np.int32) # by = sxy[:, 1] * stride + nd.floor(sxy[:, 1] / 2).astype(np.int32) by, bx = nd.split(syx*stride, 2, axis=-1, squeeze_axis=True) # by = syx[:, 0] * stride # bx = syx[:, 1] * stride # [FH*FW, N, 4] of_byx = nd.take(offsets.reshape((-1, n, 4)), by*740/4+bx) of_ctr = nd.take(ctr.reshape((-1, n)), by*740/4 + bx) # of_byx = offsets[by, bx] # ctr_aware = ctr[by, bx] # of_byx = nd.gather_nd(offsets, indices=byx.transpose()) min_vr, max_vr = self._valid_range[i] # [FH*FW, N] is_in_box = nd.prod(of_byx > 0, axis=-1) is_valid_area = (of_byx.max(axis=-1) >= min_vr) * (of_byx.max(axis=-1) <= max_vr) # [FH*FW, N] valid_pos = nd.elemwise_mul(is_in_box, is_valid_area) * of_ctr # valid_pos = nd.elemwise_mul(is_in_box, is_valid_area) # of_valid = nd.zeros((fh, fw, n)) # of_valid[syx[:, 0], syx[:, 1], :] = valid_pos * ctr_aware # 1, 0 of_valid = valid_pos.reshape((fh, fw, n)) of_valid[:, :, 0] = 0 # [FH, FW] # gt_inds = nd.argmax(of_valid, axis=-1) gt_inds = nd.argmax(of_valid, axis=-1).reshape(-1) # box targets box_target = nd.take(boxes, gt_inds).slice_axis(begin=0, end=4, axis=-1) # box_target[syx[:, 0], syx[:, 1]] = boxes[gt_inds[syx[:, 0], syx[:, 1]], :4] # box_target = box_target.reshape(-1, 4) # cls targets cls_target = nd.take(cls, gt_inds) # cls_target[syx[:, 0], syx[:, 1]] = cls[gt_inds[syx[:, 0], syx[:, 1]]] # cls_target = cls_target.reshape(-1) # match targets the number of matches less than ctr targets # match_gt_inds = nd.argmax(of_valid * (of_valid > 0.01), axis=-1).reshape(-1) match_target = nd.take(boxes_id, gt_inds) # match_target[syx[:, 0], syx[:, 1]] = boxes_id[match_gt_inds[syx[:,0], syx[:,1]]] # match_target = match_target.reshape(-1) # ctr targets ctr_target = nd.pick(of_ctr, gt_inds) # ctr_target[syx[:, 0], syx[:, 1]] = ctr[by, bx, gt_inds[syx[:, 0], syx[:, 1]]] # ctr_target = ctr_target.reshape(-1) box_targets.append(box_target) cls_targets.append(cls_target) ctr_targets.append(ctr_target) stride = int(stride / 2) match_targets.append(match_target) box_targets = nd.concat(*box_targets, dim=0) cls_targets = nd.concat(*cls_targets, dim=0) ctr_targets = nd.concat(*ctr_targets, dim=0) match_targets = nd.concat(*match_targets, dim=0) return cls_targets, ctr_targets, box_targets, match_targets
def data_iter(X, y, batch_size): idx = list(range(len(X))) random.shuffle(idx) for i in range(0, len(X), batch_size): j = nd.array(idx[i:min(i + batch_size, len(X))]) yield nd.take(X, j), nd.take(y, j)
def takeT(X, T=0): #idx = nd.array(nd.arange(T,opt.new_length,2),ctx=ctx[0]) idx = nd.array([2 * n + T for n in range(16)], ctx=ctx[0]) return nd.take(X.astype(opt.dtype, copy=False), idx, axis=3)
def takeT(X, T=0): #idx = nd.array(nd.arange(T,opt.new_length,2),ctx=ctx[0]) idx = nd.array([2 * n + T for n in range(16)], ctx=ctx[0]) return nd.take(X, idx, axis=3)