def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1, ctx=None): # First figure out what the size of the output should be N, C, H, W = x_shape assert (H + 2 * padding - field_height) % stride == 0 assert (W + 2 * padding - field_height) % stride == 0 out_height = int((H + 2 * padding - field_height) / stride + 1) out_width = int((W + 2 * padding - field_width) / stride + 1) i0 = nd.repeat(nd.arange(field_height, ctx=ctx), field_width) i0 = nd.tile(i0, C) i1 = stride * nd.repeat(nd.arange(out_height, ctx=ctx), out_width) j0 = nd.tile(nd.arange(field_width, ctx=ctx), field_height * C) j1 = stride * nd.tile(nd.arange(out_width, ctx=ctx), out_height) i = i0.reshape((-1, 1)) + i1.reshape((1, -1)) j = j0.reshape((-1, 1)) + j1.reshape((1, -1)) k = nd.repeat(nd.arange(C, ctx=ctx), field_height * field_width).reshape((-1, 1)) return (k.astype('int32'), i.astype('int32'), j.astype('int32'))
def resize_contain(src, size, fill=0): """Resize the image to fit in the given area while keeping aspect ratio. If both the height and the width in `size` are larger than the height and the width of input image, the image is placed on the center with an appropriate padding to match `size`. Otherwise, the input image is scaled to fit in a canvas whose size is `size` while preserving aspect ratio. Parameters ---------- src : mxnet.nd.NDArray The original image with HWC format. size : tuple Tuple of length 2 as (width, height). fill : int or float or array-like The value(s) for padded borders. If `fill` is numerical type, RGB channels will be padded with single value. Otherwise `fill` must have same length as image channels, which resulted in padding with per-channel values. Returns ------- mxnet.nd.NDArray Augmented image. tuple Tuple of (offset_x, offset_y, scaled_x, scaled_y) """ h, w, c = src.shape ow, oh = size scale_h = oh / h scale_w = ow / w scale = min(min(scale_h, scale_w), 1) scaled_x = int(w * scale) scaled_y = int(h * scale) if scale < 1: src = mx.image.imresize(src, scaled_x, scaled_y) off_y = (oh - scaled_y) // 2 if scaled_y < oh else 0 off_x = (ow - scaled_x) // 2 if scaled_x < ow else 0 # make canvas if isinstance(fill, numeric_types): dst = nd.full(shape=(oh, ow, c), val=fill, dtype=src.dtype) else: fill = nd.array(fill, ctx=src.context) if not c == fill.size: raise ValueError("Channel and fill size mismatch, {} vs {}".format(c, fill.size)) dst = nd.repeat(fill, repeats=oh * ow).reshape((oh, ow, c)) dst[off_y:off_y+scaled_y, off_x:off_x+scaled_x, :] = src return dst, (off_x, off_y, scaled_x, scaled_y)
def hybrid_forward( self, F, score_gt, kernel_gt, score_pred, training_masks, *args, **kwargs ): """ kernels map's order: [1, ..., 0.5] """ C_pred = score_pred[:, 0, :, :] self.pixel_acc = batch_pix_accuracy(C_pred, score_gt) # classification loss eps = 1e-5 intersection = F.sum(score_gt * C_pred * training_masks, axis=(1, 2)) union = ( F.sum(training_masks * score_gt * score_gt, axis=(1, 2)) + F.sum(training_masks * C_pred * C_pred, axis=(1, 2)) + eps ) C_dice_loss = 1.0 - (2 * intersection) / (union) # loss for kernel kernel_mask = F.where( training_masks * C_pred > 0.5, F.ones_like(C_pred), F.zeros_like(C_pred) ) kernel_mask = F.expand_dims(kernel_mask, axis=1) kernel_mask = F.repeat(kernel_mask, repeats=self.num_kernels - 1, axis=1) self.kernel_acc = batch_pix_accuracy( score_pred[:, 1, :, :] * score_gt, kernel_gt[:, 0, :, :] ) kernel_intersection = F.sum( kernel_gt * score_pred[:, 1:, :, :] * kernel_mask, axis=(2, 3) ) kernel_union = ( F.sum(kernel_gt * kernel_gt * kernel_mask, axis=(2, 3)) + F.sum( score_pred[:, 1:, :, :] * score_pred[:, 1:, :, :] * kernel_mask, axis=(2, 3), ) + eps ) kernel_dice = 1.0 - (2 * kernel_intersection) / kernel_union kernel_dice_loss = F.mean(kernel_dice, axis=1) self.C_loss = C_dice_loss self.kernel_loss = kernel_dice_loss loss = self.lam * C_dice_loss + (1.0 - self.lam) * kernel_dice_loss return loss
def Route(self, x): # b_mat = nd.repeat(self.b_mat.data(), repeats=x.shape[0], axis=0)#nd.stop_gradient(nd.repeat(self.b_mat.data(), repeats=x.shape[0], axis=0)) b_mat = nd.zeros((x.shape[0], 1, self.num_cap, self.num_locations), ctx=x.context) x_expand = nd.expand_dims(nd.expand_dims(x, axis=2), 2) w_expand = nd.repeat(nd.expand_dims(self.w_ij.data(x.context), axis=0), repeats=x.shape[0], axis=0) u_ = w_expand * x_expand u = nd.sum(u_, axis=1) for i in range(self.route_num): c_mat = nd.softmax(b_mat, axis=2) s = nd.sum(u * c_mat, axis=-1) v = squash(s, 1) v1 = nd.expand_dims(v, axis=-1) update_term = nd.sum(u * v1, axis=1, keepdims=True) b_mat = b_mat + update_term return v
def forward(self, samples, matches, anchors, refs): """Forward""" F = nd # TODO(zhreshold): batch_pick, take multiple elements? ref_boxes = nd.repeat(refs.reshape((0, 1, -1, 4)), axis=1, repeats=matches.shape[1]) ref_boxes = nd.split(ref_boxes, axis=-1, num_outputs=4, squeeze_axis=True) ref_boxes = nd.concat(*[F.pick(ref_boxes[i], matches, axis=2).reshape((0, -1, 1)) \ for i in range(4)], dim=2) g = self.corner_to_center(ref_boxes) a = self.corner_to_center(anchors) t0 = ((g[0] - a[0]) / a[2] - self._means[0]) / self._stds[0] t1 = ((g[1] - a[1]) / a[3] - self._means[1]) / self._stds[1] t2 = (F.log(g[2] / a[2]) - self._means[2]) / self._stds[2] t3 = (F.log(g[3] / a[3]) - self._means[3]) / self._stds[3] codecs = F.concat(t0, t1, t2, t3, dim=2) temp = F.tile(samples.reshape((0, -1, 1)), reps=(1, 1, 4)) > 0.5 targets = F.where(temp, codecs, F.zeros_like(codecs)) masks = F.where(temp, F.ones_like(temp), F.zeros_like(temp)) return targets, masks
def forward(self, scores, offsets, anchors, img): # 训练和预测的处理流程不同 if autograd.is_training(): pre_nms = self._train_pre_nms post_nms = self._train_post_nms else: pre_nms = self._test_pre_nms post_nms = self._test_post_nms with autograd.pause(): # 将预测的偏移量加到anchors中 rois = self._bbox_decoder(offsets, self._bbox_tocenter(anchors)) rois = self._cliper(rois, img) # 下面将所有尺寸小于设定最小值的ROI去除 x_min, y_min, x_max, y_max = nd.split(rois, num_outputs=4, axis=-1) width = x_max - x_min height = y_max - y_min invalid_mask = (width < self._min_size) + (height < self._min_size) # 将对应位置的score 设为-1 scores = nd.where(invalid_mask, nd.ones_like(scores) * -1, scores) invalid_mask = nd.repeat(invalid_mask, repeats=4, axis=-1) rois = nd.where(invalid_mask, nd.ones_like(rois) * -1, rois) # 下面进行NMS操作 pre = nd.concat(scores, rois, dim=-1) pre = nd.contrib.box_nms(pre, overlap_thresh=self._nms_thresh, topk=pre_nms, coord_start=1, score_index=0, id_index=-1, force_suppress=True) # 下面进行采样 result = nd.slice_axis(pre, axis=1, begin=0, end=post_nms) rpn_score = nd.slice_axis(result, axis=-1, begin=0, end=1) rpn_bbox = nd.slice_axis(result, axis=-1, begin=1, end=None) return rpn_score, rpn_bbox
def sample_neighbours(self, data, query_network): num_stored_samples = self.key_memory.shape[0] batch_size = data[0].shape[0] query = query_network(*data).as_in_context(mx.cpu()) vec1 = nd.repeat(query, repeats=num_stored_samples, axis=0) vec2 = nd.tile(self.key_memory, reps=(batch_size, 1)) diff = nd.subtract(vec1, vec2) sq = nd.square(diff) batch_sum = nd.sum(sq, exclude=1, axis=0) sqrt = nd.sqrt(batch_sum) dist = nd.reshape(sqrt, shape=(batch_size, num_stored_samples)) sample_ind = nd.topk(dist, k=self.k, axis=1, ret_typ="indices") num_outputs = len(self.label_memory) sample_labels = [self.label_memory[i][sample_ind] for i in range(num_outputs)] sample_batches = [[self.value_memory[j][sample_ind] for j in range(len(self.value_memory))], sample_labels] return sample_batches
def render(gfunc, stepsize=0.1, momentum=0.9, maxstep=24000): K = 10 num = 30 bbox = config.data.bbox cond = nd.one_hot(nd.repeat(nd.arange(K, ctx=ctx), (num-1)//K+1)[:num], K).reshape((num, K, 1, 1)) anoi = nd.random.normal(shape=(num,100,1,1), ctx=ctx) bnoi = nd.random.normal(shape=(num,100,1,1), ctx=ctx) slast = 0. for step in range(maxstep): snoi = anoi - bnoi sdist = snoi.norm(axis=1,keepdims=True) if sdist.min().asscalar() < .5: anoi = nd.random.normal(shape=(30,100,1,1), ctx=ctx) snoi /= sdist slast = stepsize*snoi + momentum*slast bnoi += slast gen = gfunc(noise=bnoi, cond=cond) indat = ((gen - bbox[0]) * 255/(bbox[1]-bbox[0])).asnumpy().clip(0, 255).astype(np.uint8) indat = align_images(indat, 5, 6, 32, 32, 3) yield indat
def getwh(scales, ratios, fw, fh, srmode): if srmode == 'few': num = scales.size + ratios.size - 1 width = nd.zeros((num,)) height = nd.zeros((num,)) sqt_ratios = nd.sqrt(ratios) width[:ratios.size] = scales[0] * sqt_ratios height[:ratios.size] = width[:ratios.size] / ratios width[ratios.size:] = scales[1:] * sqt_ratios[0] height[ratios.size:] = width[ratios.size:] / ratios[0] else: rscales = nd.repeat(scales, ratios.size) rratios = nd.tile(ratios, scales.size) width = rscales * nd.sqrt(rratios) height = width / rratios width = width * fw height = height * fh return width, height
def repeat_emb(param, emb): """Maybe repeat an embedding.""" res = nd.expand_dims(emb, 0) param.repeated = nd.repeat(res, repeats=param.n_repeats, axis=0) param.repeated.attach_grad() return param.repeated
def test_repeat(): x = create_vector(size=LARGE_X // 2) y = nd.repeat(x, repeats=2, axis=0) assert y.shape[0] == LARGE_X assert y[1] == 0 assert y[LARGE_X - 1] == LARGE_X // 2 - 1
def hybrid_forward(self, F, x, a, b): mean = x.mean(axis = -1) # batch * _in_seq_len _mean = nd.repeat(mean.expand_dims(axis = -1), repeats = x.shape[-1], axis = -1) # batch * _in_seq_len * embedding_dim std = nd.sqrt(nd.sum(nd.power((x - _mean), 2), axis = -1) / x.shape[1]) # batch * _in_seq_len _std = nd.repeat(std.expand_dims(axis = -1), repeats = x.shape[-1], axis = -1) # batch * _in_seq_len * embedding_dim return F.elemwise_div(F.multiply((x - _mean), a), (_std + self.eps)) + b
def predict_transform(prediction, input_dim, anchors): # get the anchor boxes in context ctx = prediction.context if not isinstance(anchors, nd.NDArray): anchors = nd.array(anchors, ctx=ctx) # get the batch size, anchor boxes per pyramid, and size of feature maps batch_size = prediction.shape[0] anchors_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] strides = [13, 26, 52] # TODO this can automatically be calculated step = [(0, 507), (507, 2535), (2535, 10647)] for i in range(3): stride = strides[i] grid = np.arange(stride) # basically repeats the above arange both vertically (a) and # horizontally (b) a, b = np.meshgrid(grid, grid) x_offset = nd.array(a.reshape((-1, 1)), ctx=ctx) y_offset = nd.array(b.reshape((-1, 1)), ctx=ctx) # creates coordinate pairs. Three for each coordinate # ((0,0), (0,0), (0,0), (0,1), ... (12, 12) x_y_offset = \ nd.repeat( nd.expand_dims( nd.repeat( nd.concat( x_offset, y_offset, dim=1), repeats=3, axis=0 ).reshape((-1, 2)), 0 ), repeats=batch_size, axis=0 ) # projects the anchor box sizes to match with the previous x_y_offset # grid setup tmp_anchors = \ nd.repeat( nd.expand_dims( nd.repeat( nd.expand_dims( anchors[anchors_masks[i]], 0 ), repeats=stride * stride, axis=0 ).reshape((-1, 2)), 0 ), repeats=batch_size, axis=0 ) # add the x,y offset to the xy of the predicition to get the coordinate # relative to the feature map origin instead of the grid location origin prediction[:, step[i][0]:step[i][1], :2] += x_y_offset # Scale the current feature map to match the input image size prediction[:, step[i][0]:step[i][1], :2] *= (float(input_dim) / stride) # scale the hw of the prediction to be relative to the anchorboxes prediction[:, step[i][0]:step[i][1], 2:4] = \ nd.exp(prediction[:, step[i][0]:step[i][1], 2:4]) * tmp_anchors return prediction
def prep_final_label(labels, num_classes, input_dim=416): # expected format for labels: # [[x, y, w, h, objectivity, class0, class1, ...], # [x, y, w, h, objectivity, class0, class1, ...], # ... # [x, y, w, h, objectivity, class0, class1, ...]] # Shape: (30, 5 + num_classes) # TODO The number of labels is hardcoded to 30, I think this is the max # number of items in a single image in the dataset. I should check on that ctx = labels.context # These anchors are borrows from those calculated on the COCO dataset from # the yolo v3 paper anchors = nd.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], ctx=ctx) # This determines which bounding boxes to use at the different pyramids # Looks like the idea is to locate the larger anchor boxes at the # smaller feature maps i.e. further downstream of network anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # create empty labels that will eventually contains ground-truth labels with # dimensions relative to the feature map anchor boxes label_1 = nd.zeros(shape=(13, 13, 3, num_classes + 5), dtype="float32", ctx=ctx) label_2 = nd.zeros(shape=(26, 26, 3, num_classes + 5), dtype="float32", ctx=ctx) label_3 = nd.zeros(shape=(52, 52, 3, num_classes + 5), dtype="float32", ctx=ctx) # create empty labels that will eventually contain ground-truth labels with # dimensions relative to the source input image true_label_1 = nd.zeros(shape=(13, 13, 3, 5), dtype="float32", ctx=ctx) true_label_2 = nd.zeros(shape=(26, 26, 3, 5), dtype="float32", ctx=ctx) true_label_3 = nd.zeros(shape=(52, 52, 3, 5), dtype="float32", ctx=ctx) label_list = [label_1, label_2, label_3] true_label_list = [true_label_1, true_label_2, true_label_3] # loop over the individual labels in this hardcoded label file for x_box in range(labels.shape[0]): # if the objectivity score is 0, then we don't care about this label if labels[x_box, 4] == 0.0: break # loop from 0-2 to handle the different pyramid scales for i in range(3): # stride == The size of the current feature map stride = 2**i * 13 # the anchor boxes to reference at this pyramid level tmp_anchors = anchors[anchors_mask[i]] # scale the xywh to the current feature map size so the coordinates # are relative to the feature map # then repeat those values across dimension 0 so we can determine # which bounding box has the highest IoU tmp_xywh = nd.repeat(nd.expand_dims(labels[x_box, :4] * stride, axis=0), repeats=tmp_anchors.shape[0], axis=0) # copy the previous tensor and plug in the bounding box height and # width. This allows us to retain the correct bounding box centers # and only change the bounding box size # Note that we are scaling the bounding box wh so that they are also # relative to the size of the feature map anchor_xywh = tmp_xywh.copy() anchor_xywh[:, 2:4] = tmp_anchors / input_dim * stride # determine which of these bounding boxes has the highest IoU and # thus is the best anchorbox for this label best_anchor = nd.argmax(bbox_iou(tmp_xywh, anchor_xywh), axis=0) label = labels[x_box].copy() # scale the offsets again (TODO why do this again?), make sure # we have nice round numbers tmp_idx = nd.floor(label[:2] * stride) # TODO We don't need to calculate this twice label[:2] = label[:2] * stride # subtract the floored values so that we just get an offset from the # origin of this feature location scaled 0-1 label[:2] -= tmp_idx tmp_idx = tmp_idx.astype("int") # calculate the offset of the ground truth from our best fit anchor # box based on equation `p * e ^ (t) where `p` is the anchor box and # `t` is the ground truth label label[2:4] = nd.log(label[2:4] * input_dim / tmp_anchors[best_anchor].reshape(-1) + 1e-12) # flip the x and y coordinates for some reason (TODO why? does this # work the other way?) and assign to correct grid location and # anchor box. (TODO this doesn't allow for multiple objects in the # same grid location with the same bounding box. what do in that # case?) label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = label # scale what we just figured out to the size of the original image # for convenience of display, I guess??? TODO why do this? true_xywhs = labels[x_box, :5] * input_dim true_xywhs[4] = 1.0 true_label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = true_xywhs # reshape our network label so it is shape (num_bounding_boxes, 5 + class_count) t_y = nd.concat(label_1.reshape((-1, num_classes + 5)), label_2.reshape((-1, num_classes + 5)), label_3.reshape((-1, num_classes + 5)), dim=0) # reshape our human labels so it is shape(num_bounding_boxes, 5) t_xywhs = nd.concat(true_label_1.reshape((-1, 5)), true_label_2.reshape((-1, 5)), true_label_3.reshape((-1, 5)), dim=0) return t_y, t_xywhs
def prep_final_label(labels, num_classes, input_dim=416): ctx = labels.context anchors = nd.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], ctx=ctx) anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] label_1 = nd.zeros(shape=(13, 13, 3, num_classes + 5), dtype="float32", ctx=ctx) label_2 = nd.zeros(shape=(26, 26, 3, num_classes + 5), dtype="float32", ctx=ctx) label_3 = nd.zeros(shape=(52, 52, 3, num_classes + 5), dtype="float32", ctx=ctx) true_label_1 = nd.zeros(shape=(13, 13, 3, 5), dtype="float32", ctx=ctx) true_label_2 = nd.zeros(shape=(26, 26, 3, 5), dtype="float32", ctx=ctx) true_label_3 = nd.zeros(shape=(52, 52, 3, 5), dtype="float32", ctx=ctx) label_list = [label_1, label_2, label_3] true_label_list = [true_label_1, true_label_2, true_label_3] for x_box in range(labels.shape[0]): if labels[x_box, 4] == 0.0: break for i in range(3): stride = 2**i * 13 tmp_anchors = anchors[anchors_mask[i]] tmp_xywh = nd.repeat(nd.expand_dims(labels[x_box, :4] * stride, axis=0), repeats=tmp_anchors.shape[0], axis=0) anchor_xywh = tmp_xywh.copy() anchor_xywh[:, 2:4] = tmp_anchors / input_dim * stride best_anchor = nd.argmax(bbox_iou(tmp_xywh, anchor_xywh), axis=0) label = labels[x_box].copy() tmp_idx = nd.floor(label[:2] * stride) label[:2] = label[:2] * stride label[:2] -= tmp_idx tmp_idx = tmp_idx.astype("int") label[2:4] = nd.log(label[2:4] * input_dim / tmp_anchors[best_anchor].reshape(-1) + 1e-12) label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = label true_xywhs = labels[x_box, :5] * input_dim true_xywhs[4] = 1.0 true_label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = true_xywhs t_y = nd.concat(label_1.reshape((-1, num_classes + 5)), label_2.reshape((-1, num_classes + 5)), label_3.reshape((-1, num_classes + 5)), dim=0) t_xywhs = nd.concat(true_label_1.reshape((-1, 5)), true_label_2.reshape((-1, 5)), true_label_3.reshape((-1, 5)), dim=0) return t_y, t_xywhs
def prep_final_label(labels, num_classes, input_dim=416): ''' 输入: labels : 416尺寸变形后的结果集标签[30行,[x,y,w,h,pc,c0-c23]=5+24=29] num_classes : 数值=24类 imput_dim : 图像输入卷积的统一尺寸 输出: t_y:[8,10647,7]=[batch_num,13x13x3+26x26x3+52x52x3,[tx,ty,tw,th,pc,c1,c2] t_xywhs:[8,10647,5]=[batch_num,13x13x3+26x26x3+52x52x3,[x,y,w,h,pc] ''' ctx = labels.context # define 9 boxs anchors = nd.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], ctx=ctx) # define 3 group anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # define 3 classes size box for label = (size, size, 锚框数=3对, 29) label_1 = nd.zeros(shape=(13, 13, 3, num_classes + 5), dtype="float32", ctx=ctx) label_2 = nd.zeros(shape=(26, 26, 3, num_classes + 5), dtype="float32", ctx=ctx) label_3 = nd.zeros(shape=(52, 52, 3, num_classes + 5), dtype="float32", ctx=ctx) # define 3 classes size box for true label = (size, size, 3, 5) true_label_1 = nd.zeros(shape=(13, 13, 3, 5), dtype="float32", ctx=ctx) true_label_2 = nd.zeros(shape=(26, 26, 3, 5), dtype="float32", ctx=ctx) true_label_3 = nd.zeros(shape=(52, 52, 3, 5), dtype="float32", ctx=ctx) # define list save 3 different size box label and true label label_list = [label_1, label_2, label_3] true_label_list = [true_label_1, true_label_2, true_label_3] # 逐行处理[x,y,w,h,pc,c,.....,c24] m, n = labels.shape for x_box in range(m): ## print(u'正在处理第{}个对象'.format(x_box)) if labels[x_box, 4].asscalar() == 0.0: ## print(u'step labels[{},4]==0.0,退出一幅图片处理。'.format(x_box)) break # 循环得到13,26,52步长 = 三个单元框的大小 13x13 26x26 52x52 for i in range(3): stride = 2**i * 13 tmp_anchors = anchors[anchors_mask[i]] # 得到一组含3个锚框尺寸 [3,2] # 装实y[3,[x,y,w,h]*13] tmp_xywh = nd.repeat( nd.expand_dims(labels[x_box, :4] * stride, axis=0), repeats=tmp_anchors.shape[0], axis=0) # [3, 4]每行代表相同锚框x3,列是[x, y, w, h]*单元框宽 # 装[ix3 , [x, y, 锚框i_w*13/416, 锚框i_h*13/416]] anchor_xywh = tmp_xywh.copy() anchor_xywh[:, 2:4] = tmp_anchors / input_dim * stride # [3, 4] # 得到最接近的锚框序号 best_anchor = nd.argmax(bbox_iou(tmp_xywh, anchor_xywh), axis=0) ## print(u'选最好的预测大小[13,26,52]框的序号是:{}'.format(best_anchor)) # 计算盒子的位置索引 label = labels[x_box].copy( ) # 已经调整尺寸的y shape = [1,29] 行=[x,y,w,h,pc,c0,c1,......,c24] k = nd.floor(label[:2] * stride) label[:2] = label[:2] * stride - k # [x,y]*13 - [x,y]*13取整=余数 ## print(u'索引序号变化结果:{}'.format(label[:2])) tmp_idx = k # [x,y]*13 四写五入=取整 tmp_idx = tmp_idx.astype("int") ## print(u'临时索引的值:{}'.format(tmp_idx)) label[2:4] = nd.log(label[2:4] * input_dim / tmp_anchors[best_anchor].reshape(-1) + 1e-12) true_xywhs = labels[x_box, :5] * input_dim true_xywhs[4] = 1.0 label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = label # here ## print('sum(true_xywhs[4]==1):{}'.format(nd.sum(true_xywhs[4]==1))) true_label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = true_xywhs t_y = nd.concat(label_list[0].reshape((-1, num_classes + 5)), label_list[1].reshape((-1, num_classes + 5)), label_list[2].reshape((-1, num_classes + 5)), dim=0) t_xywhs = nd.concat(true_label_list[0].reshape((-1, 5)), true_label_list[1].reshape((-1, 5)), true_label_list[2].reshape((-1, 5)), dim=0) return t_y, t_xywhs
def write_results(prediction, num_classes, confidence=0.5, nms_conf=0.4): ### box_confidence = nd.repeat(nd.expand_dims(prediction[:,:,4],2),num_classes,axis=2) prediction[:,:,5:5+num_classes] = prediction[:,:,5:5+num_classes] * box_confidence ### #conf_mask = (prediction[:, :, 4] > confidence).expand_dims(2) conf_mask = prediction[:, :, 5:5+num_classes] > confidence prediction[:, :, 5:5+num_classes] = prediction[:, :, 5:5+num_classes] * conf_mask batch_size = prediction.shape[0] box_corner = nd.zeros(prediction.shape, dtype="float32") box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 prediction[:, :, :4] = box_corner[:, :, :4] #pdb.set_trace() output = None for ind in range(batch_size): image_pred = prediction[ind] max_conf = nd.max(image_pred[:, 5:5 + num_classes], axis=1) max_conf_score = nd.argmax(image_pred[:, 5:5 + num_classes], axis=1) max_conf = max_conf.astype("float32").expand_dims(1) max_conf_score = max_conf_score.astype("float32").expand_dims(1) image_pred = nd.concat(image_pred[:, :5], max_conf, max_conf_score, dim=1).asnumpy() non_zero_ind = np.nonzero(image_pred[:, 5]) try: image_pred_ = image_pred[non_zero_ind, :].reshape((-1, 7)) except Exception as e: print(e) continue if image_pred_.shape[0] == 0: continue # Get the various classes detected in the image img_classes = np.unique(image_pred_[:, -1]) # -1 index holds the class index for cls in img_classes: # get the detections with one particular class cls_mask = image_pred_ * np.expand_dims(image_pred_[:, -1] == cls, axis=1) class_mask_ind = np.nonzero(cls_mask[:, -2]) image_pred_class = image_pred_[class_mask_ind].reshape((-1, 7)) # sort the detections such that the entry with the maximum objectness # confidence is at the top conf_sort_index = np.argsort(image_pred_class[:, 5])[::-1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.shape[0] pdb_num = 0 for i in range(idx): # Get the IOUs of all boxes that come after the one we are looking at # in the loop try: box1 = np.expand_dims(image_pred_class[i], 0) box2 = image_pred_class[i + 1:] if len(box2) == 0: break box1 = np.repeat(box1, repeats=box2.shape[0], axis=0) ious = bbox_iou(box1, box2, transform=False) except ValueError: break except IndexError: break # Zero out all the detections that have IoU > treshhold iou_mask = np.expand_dims(ious < nms_conf, 1).astype(np.float32) image_pred_class[i + 1:] *= iou_mask # Remove the non-zero entries non_zero_ind = np.nonzero(image_pred_class[:, 5]) image_pred_class = image_pred_class[non_zero_ind].reshape((-1, 7)) pdb_num +=1 batch_ind = np.ones((image_pred_class.shape[0], 1)) * ind seq = nd.concat(nd.array(batch_ind), nd.array(image_pred_class), dim=1) if output is None: output = seq else: output = nd.concat(output, seq, dim=0) return output