def postprocess(bbox_pred, iou_pred, prob_pred, im_shape, cfg, thresh=0.05, size_index=0): """ bbox_pred: (bsize, HxW, num_anchors, 4) ndarray of float (sig(tx), sig(ty), exp(tw), exp(th)) iou_pred: (bsize, HxW, num_anchors, 1) prob_pred: (bsize, HxW, num_anchors, num_classes) """ # num_classes, num_anchors = cfg.num_classes, cfg.num_anchors num_classes = cfg.num_classes anchors = cfg.anchors W, H = cfg.multi_scale_out_size[size_index] assert bbox_pred.shape[0] == 1, 'postprocess only support one image per batch' # noqa bbox_pred = yolo_to_bbox( np.ascontiguousarray(bbox_pred, dtype=np.float), np.ascontiguousarray(anchors, dtype=np.float), H, W) bbox_pred = np.reshape(bbox_pred, [-1, 4]) bbox_pred[:, 0::2] *= float(im_shape[1]) bbox_pred[:, 1::2] *= float(im_shape[0]) bbox_pred = bbox_pred.astype(np.int) iou_pred = np.reshape(iou_pred, [-1]) prob_pred = np.reshape(prob_pred, [-1, num_classes]) cls_inds = np.argmax(prob_pred, axis=1) prob_pred = prob_pred[(np.arange(prob_pred.shape[0]), cls_inds)] scores = iou_pred * prob_pred # scores = iou_pred assert len(scores) == len(bbox_pred), '{}, {}'.format(scores.shape, bbox_pred.shape) # threshold keep = np.where(scores >= thresh) bbox_pred = bbox_pred[keep] scores = scores[keep] cls_inds = cls_inds[keep] # NMS keep = np.zeros(len(bbox_pred), dtype=np.int) for i in range(num_classes): inds = np.where(cls_inds == i)[0] if len(inds) == 0: continue c_bboxes = bbox_pred[inds] c_scores = scores[inds] c_keep = nms_detections(c_bboxes, c_scores, 0.3) keep[inds[c_keep]] = 1 keep = np.where(keep > 0) # keep = nms_detections(bbox_pred, scores, 0.3) bbox_pred = bbox_pred[keep] scores = scores[keep] cls_inds = cls_inds[keep] # clip bbox_pred = clip_boxes(bbox_pred, im_shape) return bbox_pred, scores, cls_inds
def postprocess(bbox_pred, iou_pred, prob_pred, im_shape, cfg, thresh=0.001): """ bbox_pred: (bsize, HxW, num_anchors, 4) ndarray of float (sig(tx), sig(ty), exp(tw), exp(th)) iou_pred: (bsize, HxW, num_anchors, 1) prob_pred: (bsize, HxW, num_anchors, num_classes) """ num_classes, num_anchors = cfg['num_classes'], cfg['num_anchors'] anchors = cfg['anchors'] W, H = cfg['out_size'] assert bbox_pred.shape[ 0] == 1, 'postprocess only support one image per batch' bbox_pred = yolo_to_bbox(np.ascontiguousarray(bbox_pred, dtype=np.float), np.ascontiguousarray(anchors, dtype=np.float), H, W) bbox_pred = np.reshape(bbox_pred, [-1, 4]) bbox_pred[:, 0::2] *= float(im_shape[1]) # w bbox_pred[:, 1::2] *= float(im_shape[0]) # h bbox_pred = bbox_pred.astype(np.int) iou_pred = np.reshape(iou_pred, [-1]) prob_pred = np.reshape(prob_pred, [-1, num_classes]) cls_inds = np.argmax(prob_pred, axis=1) prob_pred = prob_pred[(np.arange(prob_pred.shape[0]), cls_inds)] scores = iou_pred * prob_pred # scores = iou_pred keep = np.where(scores >= thresh) bbox_pred = bbox_pred[keep] scores = scores[keep] cls_inds = cls_inds[keep] # NMS keep = np.zeros(len(bbox_pred), dtype=np.int) for i in range(num_classes): inds = np.where(cls_inds == i)[0] if len(inds) == 0: continue c_bboxes = bbox_pred[inds] c_scores = scores[inds] c_keep = nms_detections(c_bboxes, c_scores, 0.5) keep[inds[c_keep]] = 1 # threshold nms keep = np.where(keep > 0) bbox_pred = bbox_pred[keep] scores = scores[keep] cls_inds = cls_inds[keep] # clip bbox_pred = clip_boxes(bbox_pred, im_shape) return bbox_pred, scores, cls_inds
def _process_batch(data, size_index): W, H = cfg.multi_scale_out_size[size_index] inp_size = cfg.multi_scale_inp_size[size_index] out_size = cfg.multi_scale_out_size[size_index] bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data # net output hw, num_anchors, _ = bbox_pred_np.shape # gt _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float) _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _ious = np.zeros([hw, num_anchors, 1], dtype=np.float) _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) _boxes[:, :, 0:2] = 0.5 _boxes[:, :, 2:4] = 1.0 _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01 # scale pred_bbox anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float) bbox_pred_np = np.expand_dims(bbox_pred_np, 0) bbox_np = yolo_to_bbox(np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, H, W) # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1 bbox_np = bbox_np[0] bbox_np[:, :, 0::2] *= float(inp_size[0]) # rescale x bbox_np[:, :, 1::2] *= float(inp_size[1]) # rescale y # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) gt_boxes_b = np.asarray(gt_boxes, dtype=np.float) # for each cell, compare predicted_bbox and gt_bbox bbox_np_b = np.reshape(bbox_np, [-1, 4]) ious = bbox_ious(np.ascontiguousarray(bbox_np_b, dtype=np.float), np.ascontiguousarray(gt_boxes_b, dtype=np.float)) best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh] _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty # locate the cell of each gt_boxe cell_w = float(inp_size[0]) / W cell_h = float(inp_size[1]) / H cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h cell_inds = np.floor(cy) * W + np.floor(cx) cell_inds = cell_inds.astype(np.int) target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) target_boxes[:, 0] = cx - np.floor(cx) # cx target_boxes[:, 1] = cy - np.floor(cy) # cy target_boxes[:, 2] = \ (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0] # tw target_boxes[:, 3] = \ (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1] # th # for each gt boxes, match the best anchor gt_boxes_resize = np.copy(gt_boxes_b) gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0])) gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1])) anchor_ious = anchor_intersections( anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float)) anchor_inds = np.argmax(anchor_ious, axis=0) ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)]) for i, cell_ind in enumerate(cell_inds): if cell_ind >= hw or cell_ind < 0: print('cell inds size {}'.format(len(cell_inds))) print('cell over {} hw {}'.format(cell_ind, hw)) continue a = anchor_inds[i] # 0 ~ 1, should be close to 1 iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :] _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor) # noqa # _ious[cell_ind, a, :] = anchor_ious[a, i] _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i] _box_mask[cell_ind, a, :] = cfg.coord_scale target_boxes[i, 2:4] /= anchors[a] _boxes[cell_ind, a, :] = target_boxes[i] _class_mask[cell_ind, a, :] = cfg.class_scale _classes[cell_ind, a, gt_classes[i]] = 1. # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001) # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4]) return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask
def _build_target(self, bbox_pred_np, gt_boxes, gt_classes, dontcare): """ :param bbox_pred: shape: (bsize, h x w, num_anchors, 4) : (sig(tx), sig(ty), exp(tw), exp(th)) """ W, H = cfg.out_size inp_size = cfg.inp_size out_size = cfg.out_size # TODO: dontcare areas # dontcare_areas = np.asarray(dontcare_areas, dtype=np.float) # net output bsize, hw, num_anchors, _ = bbox_pred_np.shape # gt _boxes = np.zeros([bsize, hw, num_anchors, 4], dtype=np.float) _ious = np.zeros([bsize, hw, num_anchors, 1], dtype=np.float) _classes = np.zeros([bsize, hw, num_anchors, cfg.num_classes], dtype=np.int) _mask = np.zeros([bsize, hw, num_anchors, 1], dtype=np.int) # scale pred_bbox anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float) bbox_np = yolo_to_bbox( np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, H, W) bbox_np[:, :, :, 0::2] *= float(inp_size[0]) bbox_np[:, :, :, 1::2] *= float(inp_size[1]) # assign each box to cells for b in range(bsize): gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) # locate the cell of each gt_boxe cell_w = float(inp_size[0]) / W cell_h = float(inp_size[1]) / H cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h cell_inds = np.floor(cy) * W + np.floor(cx) cell_inds = cell_inds.astype(np.int) # gt_boxes[:, :, 0::2] /= inp_size[1] # gt_boxes[:, :, 1::2] /= inp_size[0] target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) target_boxes[:, 0] = cx - np.floor(cx) # cx target_boxes[:, 1] = cy - np.floor(cy) # cy target_boxes[:, 2] = (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0] # tw target_boxes[:, 3] = (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1] # th cell_boxes = [[] for _ in range(hw)] for i, ind in enumerate(cell_inds): cell_boxes[ind].append(i) for i in range(hw): if len(cell_boxes[i]) == 0: continue bboxes = [gt_boxes_b[j] for j in cell_boxes[i]] targets_b = np.array([target_boxes[j] for j in cell_boxes[i]], dtype=np.float) # targets_c = np.array([gt_classes[j] for j in cell_boxes[i]], dtype=np.int) ious = bbox_ious( np.ascontiguousarray(bbox_np[b, i], dtype=np.float), np.ascontiguousarray(bboxes, dtype=np.float) ) argmax = np.argmax(ious, axis=0) for j, a in enumerate(argmax): if _ious[b, i, a, 0] <= ious[a, j]: _mask[b, i, a, :] = 1 _ious[b, i, a, 0] = ious[a, j] targets_b[j, 2:4] /= anchors[a] _boxes[b, i, a, :] = targets_b[j] # print bbox_pred_np[b, i, a], targets_b[j] # _classes[b, i, a, :] = targets_c[j] _classes[b, i, a, gt_classes[b][j]] = 1 # _boxes[:, :, :, 2:4] /= anchors # # _boxes[b, i, :, :] = _box # _ious[b, i, :, :] = np.expand_dims(ious[(np.arange(len(argmax)), argmax)], 1) # _classes[b, i, :, targets_c[argmax]] = 1 # # _mask[b, i, :, :] = 1 return _boxes, _ious, _classes, _mask
def _process_batch(data, size_index): ''' 分析一下什么是多尺度的输出,这里指的是 pred 最后的size 为input/strides 通常strides 在这里是32 ''' W, H = cfg.multi_scale_out_size[size_index] inp_size = cfg.multi_scale_inp_size[size_index] out_size = cfg.multi_scale_out_size[size_index] bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data # net output hw, num_anchors, _ = bbox_pred_np.shape # gt _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float) _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _ious = np.zeros([hw, num_anchors, 1], dtype=np.float) _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) _boxes[:, :, 0:2] = 0.5 _boxes[:, :, 2:4] = 1.0 _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01 # scale pred_bbox anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float) #用于预测的bbox 将其对bs 维度进行扩充,具体操作如下,1,w×h,number anchor,4 bbox_pred_np = np.expand_dims(bbox_pred_np, 0) ''' 其应该是来源于这个问题 bx = deta(tx) + cx by = deta(ty) + cy bw = pw*exp(tw) bh = ph*exp(th) ''' bbox_np = yolo_to_bbox( np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, H, W) # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1 bbox_np = bbox_np[0] bbox_np[:, :, 0::2] *= float(inp_size[0]) # rescale x bbox_np[:, :, 1::2] *= float(inp_size[1]) # rescale y # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) gt_boxes_b = np.asarray(gt_boxes, dtype=np.float) # for each cell, compare predicted_bbox and gt_bbox #(w,h,anchors,4)---->(w*h*anchors,4) bbox_np_b = np.reshape(bbox_np, [-1, 4]) #计算预测的值和gt的overlap ious = bbox_ious( np.ascontiguousarray(bbox_np_b, dtype=np.float), np.ascontiguousarray(gt_boxes_b, dtype=np.float) ) #这里计算完的iou 是500 个候选预测和num class 的交互比 (w*h*anchor,numclass) best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh] _iou_mask[best_ious < cfg.iou_thresh] = cfg.noobject_scale * iou_penalty #iou_mask 存放的是他的 惩罚项目 # locate the cell of each gt_boxe ''' 计算每个cell 框所对应的大小 其实也就是一个predict,一格所代表的原图中的长宽 ''' cell_w = float(inp_size[0]) / W cell_h = float(inp_size[1]) / H #中间值 ''' 表示的是样本中心所对中心所对应的框框所在位置在predict 中 ''' cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h ''' 这里的cell inds 是干嘛用的呢?我们接着往下看 ×××非常重要这个是核心步骤,找到我们的hw 中所对应的中间位置,太漂亮了0~100之间 ''' cell_inds = np.floor(cy) * W + np.floor(cx) cell_inds = cell_inds.astype(np.int) target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) ''' 这里应该是使用者写错了应该对应的是tx,ty ''' target_boxes[:, 0] = cx - np.floor(cx) # cx target_boxes[:, 1] = cy - np.floor(cy) # cy ''' 表达的是 gt 在predict 中应该有的位置,这个也是一个神秘操作 这个对应的应该是 bw,bh ''' target_boxes[:, 2] = \ (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0] # tw target_boxes[:, 3] = \ (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1] # th ''' 这一步操作是获取gt_ 和anchor 的交. 并且找到那个anchor 对那个gt 负责 ''' # for each gt boxes, match the best anchor gt_boxes_resize = np.copy(gt_boxes_b) gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0])) gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1])) anchor_ious = anchor_intersections( anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float) ) anchor_inds = np.argmax(anchor_ious, axis=0) ''' cell_inds 对应的是num_class 的个数,也就是说所对应的objs的个数 ''' ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)]) ''' ious_reshaped 这里需要特别关注一下 (h*w,num_anchors,objects) 其中第一维度可以取出object中心所在位置 训练中的mask 对应的是其要乘的 scale 也可以被称之为 randa ''' for i, cell_ind in enumerate(cell_inds): if cell_ind >= hw or cell_ind < 0: print('cell inds size {}'.format(len(cell_inds))) print('cell over {} hw {}'.format(cell_ind, hw)) continue #找出对其负责的anchors 也即哪个anchor 对哪个object 负责 a = anchor_inds[i] # 0 ~ 1, should be close to 1 #预测的值于iou 的置信度 iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :] _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor) # noqa # _ious[cell_ind, a, :] = anchor_ious[a, i] #预测的值与gt的 ious _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i] _box_mask[cell_ind, a, :] = cfg.coord_scale ''' 这里为什么要除呢? bw = pw*exp(tw) --->所以除了之后会有 bw/pw = exp(tw) ,所以经过这一步操作之后会有 _boxes -->(tx,ty,exp(tw),exp(th)) ''' target_boxes[i, 2:4] /= anchors[a] _boxes[cell_ind, a, :] = target_boxes[i] _class_mask[cell_ind, a, :] = cfg.class_scale _classes[cell_ind, a, gt_classes[i]] = 1. # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001) # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4]) ''' 这里整体整理一下操作的整个过程来梳理一下bbox 的操作 1.首先对应的是mask mask 对应的是损失函数中的系数,按照paper上和源码的初始设置,我们这里设置我们的 这里的scale 对应的是损失函数中的对应系数 object_scale = 5. noobject_scale = 1. class_scale = 1. coord_scale = 1. 2.首先我们对我们预测的bbox 回归到原图坐标,这个操作是根据yolo2bbox 来实现的 我们得到我们pred_boxes 然后我们对应的pred_boxes 于gt求得一个iou 这个iou 是我们的预测于真值之间的iou 其输出为 (h*w*anchor,gt_numbers) 我们可以求出对应的最好的iou 并且根据最好的iou 可以知道iou_mask 所对应的是损失函数为多少,其best iou 小于阈值的??这个得去看下yolov1 3. 根据gt_bbox 求出 对应的tx,ty,和bw,bh 记住源码中的注释是错误的这里纠正过来,并求得其中心位置的prior 的位置index 4. 求候选prior 和ground truth-->映射到feature map空间后的 iou ,这里我们可以求出, anchor_inds,这个anchor_inds 标记着 哪个anchor 于哪一类的iou 最大,这个anchor 需要对这个类负责 记住这个类对应的是映射空间最终可以得到一系列操作,其中包括样本的中心位置,已经anchor 对应object 位置 这样根据循环,我们对每个object 的_boxes(tx,ty,exp(tw),exp(th)),_ious(预测pred 和 gt ),_classes:全文0 则表示此为此为背景 ''' return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask
def _process_batch(data): W, H = cfg.out_size inp_size = cfg.inp_size out_size = cfg.out_size bbox_pred_np, gt_boxes, gt_classes, dontcares = data # net output hw, num_anchors, _ = bbox_pred_np.shape # gt _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float) _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _ious = np.zeros([hw, num_anchors, 1], dtype=np.float) _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) _boxes[:, :, 0:2] = 0.5 _boxes[:, :, 2:4] = 1.0 _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01 # scale pred_bbox anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float) bbox_pred_np = np.expand_dims(bbox_pred_np, 0) bbox_np = yolo_to_bbox( np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, H, W) bbox_np = bbox_np[0] bbox_np[:, :, 0::2] *= float(inp_size[0]) bbox_np[:, :, 1::2] *= float(inp_size[1]) # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) gt_boxes_b = np.asarray(gt_boxes, dtype=np.float) # for each cell bbox_np_b = np.reshape(bbox_np, [-1, 4]) ious = bbox_ious( np.ascontiguousarray(bbox_np_b, dtype=np.float), np.ascontiguousarray(gt_boxes_b, dtype=np.float) ) best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale # locate the cell of each gt_boxe cell_w = float(inp_size[0]) / W cell_h = float(inp_size[1]) / H cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h cell_inds = np.floor(cy) * W + np.floor(cx) cell_inds = cell_inds.astype(np.int) target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) target_boxes[:, 0] = cx - np.floor(cx) # cx target_boxes[:, 1] = cy - np.floor(cy) # cy target_boxes[:, 2] = (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0] # tw target_boxes[:, 3] = (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1] # th # for each gt boxes, match the best anchor gt_boxes_resize = np.copy(gt_boxes_b) gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0])) gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1])) anchor_ious = anchor_intersections( anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float) ) anchor_inds = np.argmax(anchor_ious, axis=0) for i, cell_ind in enumerate(cell_inds): if cell_ind >= hw or cell_ind < 0: print cell_ind continue a = anchor_inds[i] _iou_mask[cell_ind, a, :] = cfg.object_scale _ious[cell_ind, a, :] = anchor_ious[a, i] _box_mask[cell_ind, a, :] = cfg.coord_scale target_boxes[i, 2:4] /= anchors[a] _boxes[cell_ind, a, :] = target_boxes[i] _class_mask[cell_ind, a, :] = cfg.class_scale _classes[cell_ind, a, gt_classes[i]] = 1. _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001) _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4]) return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask
def _process_batch(data): bbox_pred_np, gt_boxes, gt_classes, iou_pred_np, inp_size, cfg = data out_size = inp_size / 32 num_gt = gt_boxes.shape[0] cell_w = 32 cell_h = 32 # net output hw, num_anchors, _ = bbox_pred_np.shape # hw = num_cell # gt _classes = np.zeros([hw, num_anchors, cfg['num_classes']], dtype=np.float) _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) # _class_mask = np.ones([hw, num_anchors, 1], dtype=np.float) * cfg['class_scale'] _ious = np.zeros([hw, num_anchors, 1], dtype=np.float) _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) # _boxes[:, :, 0:2] = 0.5 # _boxes[:, :, 2:4] = 1.0 # debug mask_val # _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01 _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) # scale pred_bbox anchors = np.ascontiguousarray(cfg['anchors'], dtype=np.float) bbox_pred_np = np.expand_dims(bbox_pred_np, 0) bbox_np = yolo_to_bbox(np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, out_size[1], out_size[0]) bbox_np = bbox_np[ 0] # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1 bbox_np[:, :, 0::2] *= float(inp_size[0]) # rescale x by w bbox_np[:, :, 1::2] *= float(inp_size[1]) # rescale y by h # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) gt_boxes_b = np.asarray(gt_boxes, dtype=np.float) # for each cell, compare predicted_bbox and gt_bbox bbox_np_b = np.reshape(bbox_np, [-1, 4]) ious = bbox_ious(np.ascontiguousarray(bbox_np_b, dtype=np.float), np.ascontiguousarray(gt_boxes_b, dtype=np.float)) best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) # _iou_mask[best_ious < cfg['iou_thresh']] = cfg['noobject_scale'] * 1 iou_penalty = 0 - iou_pred_np[best_ious < cfg['iou_thresh']] _iou_mask[ best_ious < cfg['iou_thresh']] = cfg['noobject_scale'] * iou_penalty ious_reshaped = np.reshape(ious, [hw, num_anchors, num_gt]) # locate the cell of each gt_boxes cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h cell_inds = np.floor(cy) * out_size[0] + np.floor(cx) cell_inds = cell_inds.astype(np.int) target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) target_boxes[:, 0] = cx - np.floor(cx) # cx (0 ~ 1) target_boxes[:, 1] = cy - np.floor(cy) # cy (0 ~ 1) target_boxes[:, 2] = (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / cell_w # tw target_boxes[:, 3] = (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / cell_h # th # for each gt boxes, match the best anchor # gt_boxes_resize = [(xmin, ymin, xmax, ymax)] unit: cell px gt_boxes_resize = np.copy(gt_boxes_b) gt_boxes_resize[:, 0::2] /= cell_w gt_boxes_resize[:, 1::2] /= cell_h anchor_ious = anchor_intersections( anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float)) anchor_inds = np.argmax(anchor_ious, axis=0) # for every gt cell for i, cell_ind in enumerate(cell_inds): if cell_ind >= hw or cell_ind < 0: print('warning: invalid cell_ind, cx, cy, W, H', cell_ind, cx[i], cy[i], out_size[0], out_size[1]) continue a = anchor_inds[i] # do not evaluate for dontcare / unknown class if gt_classes[i] == -1: continue iou_pred = iou_pred_np[cell_ind, a, :] # 0 ~ 1, should be close to iou_truth iou_truth = ious_reshaped[cell_ind, a, i] _iou_mask[cell_ind, a, :] = cfg['object_scale'] * (iou_truth - iou_pred) _ious[cell_ind, a, :] = iou_truth truth_w = (gt_boxes_b[i, 2] - gt_boxes_b[i, 0]) / inp_size[0] truth_h = (gt_boxes_b[i, 3] - gt_boxes_b[i, 1]) / inp_size[1] _box_mask[cell_ind, a, :] = cfg['coord_scale'] * (2 - truth_w * truth_h) target_boxes[i, 2:4] /= anchors[a] _boxes[cell_ind, a, :] = target_boxes[i] _class_mask[cell_ind, a, :] = cfg['class_scale'] _classes[cell_ind, a, gt_classes[i]] = 1. # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001) # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4]) # _boxes = (sig(tx), sig(ty), exp(tw), exp(th)) return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask
def _process_batch(data, size_index): W, H = cfg.multi_scale_out_size[size_index] inp_size = cfg.multi_scale_inp_size[size_index] out_size = cfg.multi_scale_out_size[size_index] bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data # net output hw, num_anchors, _ = bbox_pred_np.shape # gt _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float) _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _ious = np.zeros([hw, num_anchors, 1], dtype=np.float) _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) _boxes[:, :, 0:2] = 0.5 _boxes[:, :, 2:4] = 1.0 _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01 # scale pred_bbox anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float) bbox_pred_np = np.expand_dims(bbox_pred_np, 0) bbox_np = yolo_to_bbox( np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, H, W) # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1 bbox_np = bbox_np[0] bbox_np[:, :, 0::2] *= float(inp_size[0]) # rescale x bbox_np[:, :, 1::2] *= float(inp_size[1]) # rescale y # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) gt_boxes_b = np.asarray(gt_boxes, dtype=np.float) # for each cell, compare predicted_bbox and gt_bbox bbox_np_b = np.reshape(bbox_np, [-1, 4]) ious = bbox_ious( np.ascontiguousarray(bbox_np_b, dtype=np.float), np.ascontiguousarray(gt_boxes_b, dtype=np.float) ) best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh] _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty # locate the cell of each gt_boxe cell_w = float(inp_size[0]) / W cell_h = float(inp_size[1]) / H cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h cell_inds = np.floor(cy) * W + np.floor(cx) cell_inds = cell_inds.astype(np.int) target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) target_boxes[:, 0] = cx - np.floor(cx) # cx target_boxes[:, 1] = cy - np.floor(cy) # cy target_boxes[:, 2] = \ (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0] # tw target_boxes[:, 3] = \ (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1] # th # for each gt boxes, match the best anchor gt_boxes_resize = np.copy(gt_boxes_b) gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0])) gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1])) anchor_ious = anchor_intersections( anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float) ) anchor_inds = np.argmax(anchor_ious, axis=0) ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)]) for i, cell_ind in enumerate(cell_inds): if cell_ind >= hw or cell_ind < 0: print('cell inds size {}'.format(len(cell_inds))) print('cell over {} hw {}'.format(cell_ind, hw)) continue a = anchor_inds[i] # 0 ~ 1, should be close to 1 iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :] _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor) # noqa # _ious[cell_ind, a, :] = anchor_ious[a, i] _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i] _box_mask[cell_ind, a, :] = cfg.coord_scale target_boxes[i, 2:4] /= anchors[a] _boxes[cell_ind, a, :] = target_boxes[i] _class_mask[cell_ind, a, :] = cfg.class_scale _classes[cell_ind, a, gt_classes[i]] = 1. # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001) # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4]) return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask
def _process_batch(data, size_index): W, H = cfg.multi_scale_out_size[size_index] inp_size = cfg.multi_scale_inp_size[size_index] out_size = cfg.multi_scale_out_size[size_index] bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data # net output hw, num_anchors, _ = bbox_pred_np.shape # gt _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float) _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _ious = np.zeros([hw, num_anchors, 1], dtype=np.float) _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) # _boxes[:, :, 0:2] = 0.5 # _boxes[:, :, 2:4] = 1.0 # _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01 _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) # scale pred_bbox anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float) bbox_pred_np = np.expand_dims(bbox_pred_np, 0) bbox_np = yolo_to_bbox(np.ascontiguousarray(bbox_pred_np, dtype=np.float), anchors, H, W) # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1 # 预测值转移到实际框的位置 bbox_np = bbox_np[0] bbox_np[:, :, 0::2] *= float(inp_size[0]) # rescale x 乘以原图大小 bbox_np[:, :, 1::2] *= float(inp_size[1]) # rescale y # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float) gt_boxes_b = np.asarray(gt_boxes, dtype=np.float) # [R,4] # for each cell, compare predicted_bbox and gt_bbox, 这里是在image_input_size的层面比较的 bbox_np_b = np.reshape(bbox_np, [-1, 4]) ious = bbox_ious(np.ascontiguousarray(bbox_np_b, dtype=np.float), np.ascontiguousarray(gt_boxes_b, dtype=np.float)) best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) # iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh] # _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty # 小于阈值认为没有物体,将mask设为 -p _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale # locate the cell of each gt_boxe cell_w = float(inp_size[0]) / W cell_h = float(inp_size[1]) / H cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h cell_inds = np.floor(cy) * W + np.floor(cx) cell_inds = cell_inds.astype(np.int) target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) target_boxes[:, 0] = cx - np.floor(cx) # bx - cx = sig(tx) target_boxes[:, 1] = cy - np.floor(cy) # by - cy = sig(ty) target_boxes[:, 2] = \ (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0] # bw target_boxes[:, 3] = \ (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1] # bh # for each gt boxes, match the best anchor # 将gt_boxes转化到特征图大小和anchor相比较 gt_boxes_resize = np.copy(gt_boxes_b) gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0])) gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1])) anchor_ious = anchor_intersections( anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float)) # 这里假设他们的中心重合 anchor_inds = np.argmax(anchor_ious, axis=0) # 每个实际框对应的最佳锚框 ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)]) for i, cell_ind in enumerate( cell_inds): # 最后只会标注 gt_bbox所在的 cell, 和gt_bbox选出的最佳 anchor if cell_ind >= hw or cell_ind < 0: print('cell inds size {}'.format(len(cell_inds))) print('cell over {} hw {}'.format(cell_ind, hw)) continue a = anchor_inds[i] # 0 ~ 1, should be close to 1 # iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :] # _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor) # noqa _iou_mask[cell_ind, a, :] = cfg.object_scale # _ious[cell_ind, a, :] = anchor_ious[a, i] _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i] _box_mask[cell_ind, a, :] = cfg.coord_scale target_boxes[i, 2:4] /= anchors[a] # bw / bh _boxes[cell_ind, a, :] = target_boxes[i] _class_mask[cell_ind, a, :] = cfg.class_scale _classes[cell_ind, a, gt_classes[i]] = 1. # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001) # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4]) return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask