def get_ground_truth_predictors(self, ground_truth, label_map, im=None): i = 0 #indexes the anchor boxes j = 0 total_boxes_per_gt = sum(self.anchor_nums) num_ground_truth_in_im = ground_truth.shape[0] inds = np.zeros((num_ground_truth_in_im, total_boxes_per_gt), dtype = np.int) #n index the the detection maps for n, anchor in enumerate(self.anchor_nums): offset = sum(self.num_pred_boxes[:n]) try: center_cells = (ground_truth[:,[0,1]]) // self.strides[n] except: print(ground_truth) assert False a = offset + self.anchor_nums[n]*(self.inp_dim//self.strides[n]*center_cells[:,1] + center_cells[:,0]) inds[:,sum(self.anchor_nums[:n])] = a for x in range(1, self.anchor_nums[n]): inds[:,sum(self.anchor_nums[:n]) + x] = a + x i += anchor j += self.num_pred_boxes[n] candidate_boxes = label_map[inds][:,:,:4] candidate_boxes = center_to_corner(candidate_boxes) ground_truth_boxes = center_to_corner(ground_truth.copy()[np.newaxis]).squeeze(0)[:,:4] candidate_boxes = candidate_boxes.transpose(0,2,1) ground_truth_boxes = ground_truth_boxes[:,:,np.newaxis] candidate_ious = bbox_iou(candidate_boxes, ground_truth_boxes, lib="numpy") prediction_boxes = np.zeros((num_ground_truth_in_im,1), dtype=np.int) for i in range(num_ground_truth_in_im): #get the the row and the column of the highest IoU max_iou_ind = np.argmax(candidate_ious) max_iou_row = max_iou_ind // total_boxes_per_gt max_iou_col = max_iou_ind % total_boxes_per_gt #get the index (in label map) of the box with maximum IoU max_iou_box = inds[max_iou_row, max_iou_col] #assign the bounding box to the appropriate gt prediction_boxes[max_iou_row] = max_iou_box #zero out all the IoUs for this box so it can't be reassigned to any other gt box_mask = (inds != max_iou_ind).reshape(-1,len(self.anchors)) candidate_ious *= box_mask #zero out all the values of the row representing gt that just got assigned so that it #doesn't participate in the process again candidate_ious[max_iou_row] *= 0 return (prediction_boxes)
def get_no_obj_candidates(self, ground_truth, label_map, ground_truth_predictors): total_boxes_per_gt = sum(self.anchor_nums) num_ground_truth_in_im = ground_truth.shape[0] inds = np.zeros((num_ground_truth_in_im, total_boxes_per_gt), dtype = np.int) inds = np.arange(sum(self.num_pred_boxes)).astype(int) inds = inds[np.newaxis].repeat(num_ground_truth_in_im, axis = 0) candidate_boxes = label_map[inds][:,:,:4] candidate_boxes = center_to_corner(candidate_boxes) ground_truth_boxes = center_to_corner(ground_truth.copy()[np.newaxis]).squeeze(0)[:,:4] candidate_boxes = candidate_boxes.transpose(0,2,1) ground_truth_boxes = ground_truth_boxes[:,:,np.newaxis] candidate_ious = bbox_iou(candidate_boxes, ground_truth_boxes, lib = "numpy") candidate_ious[:, ground_truth_predictors] = 1 max_ious_per_box = np.max(candidate_ious, 0) no_obj_cands = (np.nonzero(max_ious_per_box < 0.5)[0].astype(int)) return no_obj_cands
def do_nms(boxes, nms_thresh): if len(boxes) > 0: nb_class = len(boxes[0].classes) else: return for c in range(nb_class): sorted_indices = np.argsort([-box.classes[c] for box in boxes]) for i in range(len(sorted_indices)): index_i = sorted_indices[i] if boxes[index_i].classes[c] == 0: continue for j in range(i + 1, len(sorted_indices)): index_j = sorted_indices[j] if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh: boxes[index_j].classes[c] = 0
def write_results(predictions, confidence, num_class, nms=True, nms_thresh=0.4): # 保留预测结果中置信度大于给定阈值的部分 # confidence: shape=(1,10647, 85) # mask: shape=(1,10647) => 增加一维度之后 (1, 10647, 1) mask = (predictions[:, :, 4] > confidence).float().unsqueeze(2) predictions = predictions * mask # 小于置信度的条目值全为0, 剩下部分不变 # 如果没有检测任何有效目标,返回值为0 ind_nz = torch.nonzero(predictions[:, :, 4].squeeze()).squeeze() if ind_nz.size(0) == 0: return 0 # predictions = predictions[:, ind_nz, :] # try: # # ind_nz: shape=(14,2)=>(2,14) 结果每一项是 非零数据所在行,所在列 # ind_nz = torch.nonzero(predictions[:, :, 4]).transpose(0, 1).contiguous() # except: # return 0 # 没有任何有效预测输出 ''' 保留预测结果中置信度大于阈值的bbox 下面开始为nms准备 ''' # prediction的前五个数据分别表示 (Cx, Cy, w, h, score) bbox = predictions.new(predictions.shape) bbox[:, :, 0] = (predictions[:, :, 0] - predictions[:, :, 2] / 2 ) # x1 = Cx - w/2 bbox[:, :, 1] = (predictions[:, :, 1] - predictions[:, :, 3] / 2 ) # y1 = Cy - h/2 bbox[:, :, 2] = (predictions[:, :, 0] + predictions[:, :, 2] / 2 ) # x2 = Cx + w/2 bbox[:, :, 3] = (predictions[:, :, 1] + predictions[:, :, 3] / 2 ) # y2 = Cy + h/2 predictions[:, :, :4] = bbox[:, :, :4] # 计算后的新坐标复制回去 batch_size = predictions.size(0) # dim=0 # output = predictions.new(1, predictions.size(2)+1) # shape=(1,85+1) write = False # 拼接结果到output中最后返回 for ind in range(batch_size): # 选择此batch中第ind个图像的预测结果 prediction = predictions[ind] ind_nz = torch.nonzero(prediction[:, 4].squeeze()).squeeze() if ind_nz.size(0) == 0: continue prediction = prediction[ind_nz, :] # print(prediction.shape) # shape=(10647->14, 85) # 最大值, 最大值索引, 按照dim=1 方向计算 max_score, max_score_ind = torch.max(prediction[:, 5:], 1) # prediction[:, 5:]表示每一分类的分数 # 维度扩展 # max_score: shape=(10647->14) => (10647->14,1) max_score = max_score.float().unsqueeze(1) max_score_ind = max_score_ind.float().unsqueeze(1) seq = (prediction[:, :5], max_score, max_score_ind) # 取前五 prediction = torch.cat(seq, 1) # shape=(10647, 5+1+1=7) # print(prediction.shape) # 获取当前图像检测结果中出现的所有类别 try: image_classes = unique(prediction[:, -1]) # tensor, shape=(n) except: continue # 执行classwise nms for cls in image_classes: # 分离检测结果中属于当前类的数据 # -1: cls_index, -2: score class_mask = (prediction[:, -1] == cls) # shape=(n) class_mask_ind = torch.nonzero( class_mask).squeeze() # shape=(n,1) => (n) # prediction_: shape(n,7) prediction_class = prediction[class_mask_ind].view( -1, 7) # 从prediction中取出属于cls类别的所有结果,为下一步的nms的输入 ''' 到此步 prediction_class 已经存在了我们需要进行非极大值抑制的数据 ''' # 开始 nms # 按照score排序, 由大到小 # 最大值最上面 score_sort_ind = torch.sort( prediction_class[:, 4], descending=True)[1] # [0] 排序结果, [1]排序索引 prediction_class = prediction_class[score_sort_ind] cnt = prediction_class.size(0) # 个数 '''开始执行 "非极大值抑制" 操作''' if nms: for i in range(cnt): # 对已经有序的结果,每次开始更新后索引加一,挨个与后面的结果比较 try: ious = bbox_iou(prediction_class[i].unsqueeze(0), prediction_class[i + 1:]) except ValueError: break except IndexError: break # 计算出需要移除的item iou_mask = (ious < nms_thresh).float().unsqueeze(1) prediction_class[i + 1:] *= iou_mask # 保留i自身 # 开始移除 non_zero_ind = torch.nonzero(prediction_class[:, 4].squeeze()) prediction_class = prediction_class[non_zero_ind].view( -1, 7) # iou_mask = (ious < nms_thresh).float() # shape=(n) # non_zero_ind = torch.nonzero(iou_mask).squeeze()+1 # 会为空,导致出错 # prediction_class = prediction_class[non_zero_ind].view(-1, 7) # 当前类的nms执行完之后,保存结果 batch_ind = prediction_class.new(prediction_class.size(0), 1).fill_(ind) seq = batch_ind, prediction_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4): """ 将输出结果根据目标分数阈值和非最大值抑制来获得true检测结果 :param prediction: 预测张量包含B x 10647个边界框的信息 :param confidence: 置信度 :param num_classes: 类别数量 :param nms: 是否有nms操作 :param nms_conf:NMS IoU阈值 :return:D x 8的张量。D是所有图像的true检测,每个检测由一行表示。 每个检测有8个属性,即检测的图像在所属批次中的索引,4个角坐标,目标分数,最大置信度类别的分数以及该类别的索引。 """ # 预测张量包含B x 10647个边界框的信息.对于每个具有低于阈值的目标分数的边界框,将它的每个属性(边界框的整个行)的值设置为零 conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2) prediction = prediction * conf_mask try: ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous() except: return 0 # 现在具有的边界框属性由中心坐标以及边界框的高度和宽度描述。但是,使用每个框的一对角点的坐标来计算两个框的IoU更容易。 # 将框的(中心x,中心y,高度,宽度)属性转换为(左上角x,左上角y,右下角x,右下角y) box_corner = prediction.new(prediction.shape) box_corner[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_corner[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_corner[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_corner[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_corner[:, :, :4] """ 每幅图像中的true检测结果的数量可能不同。 例如,批量大小为3,图像1,2和3分别具有5个,2个和4个true检测结果。 因此,一次只能对一张图像进行置信度阈值和NMS。这意味着,我们不能向量化所涉及的操作 ,并且必须在prediction的第一维(包含批量中的图像索引)上进行循环。 """ batch_size = prediction.size(0) image_pred = prediction.new(1, prediction.size(2) + 1) # write标志用于指示我们尚未初始化output,我们将使用张量来保存整个批量的true检测结果。 write = False for ind in range(batch_size): # 从批次中选择图片 image_pred = prediction[ind] # 每个边界框行有85个属性,其中80个是类别分数。此时,我们只关心具有最大值的类别分数。 # 从每一行中删除80个类别的分数,并添加具有最大值的类别的索引,以及该类别的类别分数。 max_conf, max_conf_score = torch.max( image_pred[:, 5:(5 + num_classes)], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_score = max_conf_score.float().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) # 我们已经将具有小于阈值的目标置信度的边界框行设置为零.现在让我们清除它们 non_zero_ind = (torch.nonzero(image_pred[:, 4])) image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) # 处理一个图像中检测到的类 try: img_classes = unique(image_pred_[:, -1]) except: continue # 对每一个检测类进行NMS for cls in img_classes: # 获取一个特定类的检测结果 cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1, 7) # 对检测进行结果按大小排序,信息分数是最重要的 conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) # 传进来的参数nms为True if nms: # 1个边界框对其他所有的边界框进行计算IOU,执行NMS for i in range(idx): # 获取该次循环中所有我们正则查看boxes的IOUs try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break # 每次迭代,任何具有索引大于i的的边界框, # 若其IoU大于阈值nms_thresh(具有由i索引的框),则该边界框将被去除。 iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask # Remove the non-zero entries non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view( -1, 7) """ 和以前一样,除非我们有一个检测分配给它,否则我们不会初始化输出张量。 一旦它被初始化,我们把后续的检测与它连接。我们使用write标志来指示张量 是否已经初始化。在遍历类的循环结束时,我们将检测结果添加到张量output中。 """ batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) # 没有物标种类就输出0 try: return output except: return 0
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4): conf_mask = (prediction[:, :, 4] > confidence).float().float().unsqueeze(2) # 1, 10674 ,1 prediction = prediction * conf_mask # 1,10647,85 try: ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous() #2,n n为满足条件的个数 except: return 0 # xmin ymin xmax ymax box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2]/2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3]/2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2]/2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3]/2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) # batch no. output = prediction.new(1, prediction.size(2) + 1) # 1,86 5 + 80 + 1 write = False num = 0 for ind in range(batch_size): # select the image from the batch image_pred = prediction[ind] # 10674, 85 # Get the class having maximum score, and the index of that class # Get rid of num_classes softmax scores # Add the class index and the class score of class having maximum score max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1) #10647 max_conf = max_conf.float().unsqueeze(1) # 10647,1 max_conf_score = max_conf_score.float().unsqueeze(1) #10674, 1 seq = (image_pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) # 10674, 7 (xmin, xmax,ymin,ymax, conf, class_max_conf, class_max_conf_idx) # Get rid of the zero entries non_zero_ind = (torch.nonzero(image_pred[:, 4])) # n, 1 conf不为0的行坐标, n为目标个数 image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) # 筛选出满足条件的目标 n,7 # Get the various classes detected in the image try: img_classes = unique(image_pred_[:, -1]) # class idx 检测到的类别个数列表 except: continue # WE will do NMS classwise # print(img_classes) for cls in img_classes: # if cls != 0: #0 is the person # continue # get the detections with one particular class cls_mask = image_pred_*(image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1,7) # sort the detections such that the entry with the maximum objectness # confidence is at the top conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) # if nms has to be done if nms: # For each detection for i in range(idx): # Get the IOUs of all boxes that come after the one we are looking at # in the loop try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:]) except ValueError: break except IndexError: break # Zero out all the detections that have IoU > treshhold iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i+1:] *= iou_mask # Remove the non-zero entries non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view(-1,7) # if nms has to be done # if nms: # # Perform non-maximum suppression # max_detections = [] # while image_pred_class.size(0): # # Get detection with highest confidence and save as max detection # max_detections.append(image_pred_class[0].unsqueeze(0)) # # Stop if we're at the last detection # if len(image_pred_class) == 1: # break # # Get the IOUs for all boxes with lower confidence # ious = bbox_iou(max_detections[-1], image_pred_class[1:]) # # Remove detections with IoU >= NMS threshold # image_pred_class = image_pred_class[1:][ious < nms_conf] # image_pred_class = torch.cat(max_detections).data # Concatenate the batch_id of the image to the detection # this helps us identify which image does the detection correspond to # We use a linear straucture to hold ALL the detections from the batch # the batch_dim is flattened # batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq,1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output,out)) num += 1 if not num: return 0 return output
def __getitem__(self, idx): # get image input size, change every 10 batches net_h, net_w = self._get_net_size(idx) base_grid_h, base_grid_w = net_h // self.downsample, net_w // self.downsample # determine the first and the last indices of the batch l_bound = idx * self.batch_size r_bound = (idx + 1) * self.batch_size if r_bound > len(self.instances): r_bound = len(self.instances) l_bound = r_bound - self.batch_size x_batch = np.zeros( (r_bound - l_bound, net_h, net_w, 3)) # input images t_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.max_box_per_image, 4)) # list of groundtruth boxes # initialize the inputs and the outputs yolo_1 = np.zeros( (r_bound - l_bound, 1 * base_grid_h, 1 * base_grid_w, len(self.anchors) // 3, 4 + 1 + len(self.labels))) # desired network output 1 yolo_2 = np.zeros( (r_bound - l_bound, 2 * base_grid_h, 2 * base_grid_w, len(self.anchors) // 3, 4 + 1 + len(self.labels))) # desired network output 2 yolo_3 = np.zeros( (r_bound - l_bound, 4 * base_grid_h, 4 * base_grid_w, len(self.anchors) // 3, 4 + 1 + len(self.labels))) # desired network output 3 yolos = [yolo_3, yolo_2, yolo_1] dummy_yolo_1 = np.zeros((r_bound - l_bound, 1)) dummy_yolo_2 = np.zeros((r_bound - l_bound, 1)) dummy_yolo_3 = np.zeros((r_bound - l_bound, 1)) instance_count = 0 true_box_index = 0 # do the logic to fill in the inputs and the output for train_instance in self.instances[l_bound:r_bound]: # augment input image and fix object's position and size img, all_objs = self._aug_image(train_instance, net_h, net_w) for obj in all_objs: # find the best anchor box for this object max_anchor = None max_index = -1 max_iou = -1 shifted_box = BoundBox(0, 0, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']) for i in range(len(self.anchors)): anchor = self.anchors[i] iou = bbox_iou(shifted_box, anchor) if max_iou < iou: max_anchor = anchor max_index = i max_iou = iou # determine the yolo to be responsible for this bounding box yolo = yolos[max_index // 3] grid_h, grid_w = yolo.shape[1:3] # determine the position of the bounding box on the grid center_x = .5 * (obj['xmin'] + obj['xmax']) center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x center_y = .5 * (obj['ymin'] + obj['ymax']) center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y # determine the sizes of the bounding box w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h box = [center_x, center_y, w, h] # determine the index of the label obj_indx = self.labels.index(obj['name']) # determine the location of the cell responsible for this object grid_x = int(np.floor(center_x)) grid_y = int(np.floor(center_y)) # assign ground truth x, y, w, h, confidence and class probs to y_batch yolo[instance_count, grid_y, grid_x, max_index % 3] = 0 yolo[instance_count, grid_y, grid_x, max_index % 3, 0:4] = box yolo[instance_count, grid_y, grid_x, max_index % 3, 4] = 1. yolo[instance_count, grid_y, grid_x, max_index % 3, 5 + obj_indx] = 1 # assign the true box to t_batch true_box = [ center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin'] ] t_batch[instance_count, 0, 0, 0, true_box_index] = true_box true_box_index += 1 true_box_index = true_box_index % self.max_box_per_image # assign input image to x_batch if self.norm != None: x_batch[instance_count] = self.norm(img) else: # plot image and bounding boxes for sanity check for obj in all_objs: cv2.rectangle(img, (obj['xmin'], obj['ymin']), (obj['xmax'], obj['ymax']), (255, 0, 0), 3) cv2.putText(img, obj['name'], (obj['xmin'] + 2, obj['ymin'] + 12), 0, 1.2e-3 * img.shape[0], (0, 255, 0), 2) x_batch[instance_count] = img # increase instance counter in the current batch instance_count += 1 return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
def parse_targets(self, targets, anchors, grid_w, grid_h, threshold): # Initalize variables batch_size = targets.size(0) mask = torch.zeros(batch_size, self.num_anchors, grid_w, grid_h, requires_grad=False) noobj_mask = torch.ones(batch_size, self.num_anchors, grid_w, grid_h, requires_grad=False) t_x = torch.zeros(batch_size, self.num_anchors, grid_w, grid_h, requires_grad=False) t_y = torch.zeros(batch_size, self.num_anchors, grid_w, grid_h, requires_grad=False) t_w = torch.zeros(batch_size, self.num_anchors, grid_w, grid_h, requires_grad=False) t_h = torch.zeros(batch_size, self.num_anchors, grid_w, grid_h, requires_grad=False) t_conf = torch.zeros(batch_size, self.num_anchors, grid_w, grid_h, requires_grad=False) t_class = torch.zeros(batch_size, self.num_anchors, grid_w, grid_h, self.num_classes, requires_grad=False) # Calculate values for b in range(batch_size): for t in range(targets.shape[1]): if targets[b, t].sum() == 0: continue #Convert positions to make them relative to box g_x = targets[b, t, 1] * grid_w g_y = targets[b, t, 2] * grid_h g_w = targets[b, t, 3] * grid_w g_h = targets[b, t, 4] * grid_h # Get the indices of the grid box g_i = int(g_x) g_j = int(g_y) # Get shape of ground truth box ground_truth_box = torch.FloatTensor(np.array([0, 0, g_w, g_h ])).unsqueeze(0) # Get shape of anchor box anchor_shapes = torch.FloatTensor( np.concatenate((np.zeros( (self.num_anchors, 2)), np.array(anchors)), 1)) # Calculate the IoU between gt and anchor shapes anchor_ious = bbox_iou(ground_truth_box, anchor_shapes) # Set mask to zero where the overlap is larger than the threshold noobj_mask[b, anchor_ious > threshold, g_j, g_i] = 0 # Find the best matching anchor box n_best = np.argmax(anchor_ious) # TODO: add 1st dimension b back in mask[b, n_best, g_j, g_i] = 1 t_x[b, n_best, g_j, g_i] = g_x - g_i t_y[b, n_best, g_j, g_i] = g_y - g_j t_w[b, n_best, g_j, g_i] = math.log(g_w / anchors[n_best][0] + 1e-16) t_h[b, n_best, g_j, g_i] = math.log(g_h / anchors[n_best][1] + 1e-16) t_conf[b, n_best, g_j, g_i] = 1 t_class[b, n_best, g_j, g_i, int(targets[b, t, 0])] = 1 return mask, noobj_mask, t_x, t_y, t_w, t_h, t_conf, t_class
def eval_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4): conf_mask = (prediction[:, :, 4] > confidence).unsqueeze(2) prediction = prediction * conf_mask box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) write = False for ind in range(batch_size): image_pred = prediction[ind] max_conf, max_conf_index = torch.max(image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.unsqueeze(1) max_conf_index = max_conf_index.unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_index) image_pred = torch.cat(seq, 1) non_zero_ind = torch.nonzero(image_pred[:, 4]) if len(non_zero_ind) == 0: continue image_pred_ = image_pred[non_zero_ind.squeeze(), :] img_classes = torch.unique(image_pred_[:, -1], True) for cls in img_classes: cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind] conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) if nms: for i in range(idx): try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask non_zero_ind = torch.nonzero(image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view(-1, 7) batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output.cpu().data.numpy()
def write_results(prediction, confidence, num_classes, model_dim, orig_dim, nms=True, nms_conf=0.7): """ Arguments --------- prediction : tensor (3D) [batch, image_id, [x_center, y_center, width, height, objectness_score, class_score1, class_score2, ...]] Returns -------- output : tensor (2D) [image_id, [batch_index, x_1, y_1, x_2, y_2, objectness_score, class_index, class_probability]] """ # Initialize to no output output = -1 # Technically, this should always be 1 batch_size = prediction.size(0) # Get rid of 1st dim orig_dim = orig_dim.squeeze(0) # If the entire batch contains 0 for objectness score, skip try: torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous() except: return -1 # Keep track of if output has been compiled yet (for concatenation) write = False for ind in range(batch_size): pred = prediction[ind] if pred.shape[0] > 0: # Get x1y1x2y2 pred = center_to_corner_2d(pred) # #Get the class having maximum score, and the index of that class # #Get rid of num_classes softmax scores # #Add the class index and the class score of class having maximum score max_conf_score, max_conf = torch.max(pred[:, 5:5 + num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_score = max_conf_score.float().unsqueeze(1) seq = (pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) #Get rid of the zero entries for objectness non_zero_ind = (torch.nonzero(image_pred[:, 4])) image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) # Remove low confidence by class probs image_pred_ = image_pred_[image_pred_[:, -1] > confidence, :] #Get the various classes detected in the image try: img_classes = unique(image_pred_[:, -2].int()) print('img_classes ', img_classes) except: continue #WE will do NMS classwise for label in img_classes: #get the detections with one particular class cls_mask_ind = (image_pred_[:, -2].int() == label) # class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze() image_pred_class = image_pred_[cls_mask_ind].view(-1, 7) #sort the detections such that the entry with the maximum objectness #confidence is at the top conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) #if nms has to be done if nms: #For each detection for i in range(idx): #Get the IOUs of all boxes that come after the one we are looking at #in the loop try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: continue except IndexError: continue # Zero out all the detections that have IoU > nms treshhold iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask # Keep the non-zero entries for objectness non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view( -1, 7) batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4): # 对于prediction有B*10647个边界框,如果object检测预测值小于confidence # 则忽略 # 在prediction第二维加入一维,代表conf_mask,如果低于阈值,全部置0 conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2) prediction = prediction * conf_mask try: ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous() except: return 0 # 转换坐标。从中心点(x,y),height,width。转化成左上角坐标,右下角坐标 # 便于IOU的计算 box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): # image_tensor,batch_size中的每一张图片 image_pred = prediction[ind] # 获取最大置信度的类,并设置概率为1 max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_score = max_conf_score.float().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_score) # 现在的shape为[:,5+num_classes+2] 分别为max_conf和max_conf_score image_pred = torch.cat(seq, 1) # 弃置边界框低于置信度的 non_zero_ind = (torch.nonzero(image_pred[:, 4])) try: # 将非0的anchor索引取出来, image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) except: continue # 如果检测没有目标直接跳过 if image_pred_.shape[0] == 0: continue try: img_classes = unique(image_pred_[:, -1]) #最后一个索引代表目标类 except: continue #WE will do NMS classwise for cls in img_classes: # 执行非最大值抑制 # get the detections with one particular class # 是不是当前检测的目标 cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1, 7) # 对检测进行排序,从大到小的概率,降序排序 conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] #检测的目标数量 idx = image_pred_class.size(0) #是否执行非极大值抑制 if nms: #For each detection for i in range(idx): # 获得IOU try: #获得置信度最大的框框和其他框的iou值 ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break #去除所有IOU值大于阈值的框框 iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask #保留非零的那些预测 non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view( -1, 7) #Concatenate the batch_id of the image to the detection #this helps us identify which image does the detection correspond to #We use a linear straucture to hold ALL the detections from the batch #the batch_dim is flattened #batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4): ''' 对网络的输出进行处理得到最终的输出 ''' # 将小于置信度阈值的边界框的整行设置为0 conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2) prediction = prediction * conf_mask try: ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous() except: return 0 # 将bboxes的坐标转换为对角线坐标的形式 box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False # 对每张图片依次进行置信度阈值判断和NMS for ind in range(batch_size): #select the image from the batch image_pred = prediction[ind] # 得到具有最大分数的类别,以及该类别的索引 # 将num_classes个类别分数删除 # 加入具有最大分数的类别的索引与分数 max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.float().unsqueeze(1) # 在索引的第二维新加一维 max_conf_score = max_conf_score.float().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) # 去除置信度分数为0的边界框(即行) non_zero_ind = (torch.nonzero(image_pred[:, 4])) image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) #Get the various classes detected in the image try: img_classes = unique(image_pred_[:, -1]) except: continue #WE will do NMS classwise # 按照类别进行NMS for cls in img_classes: # 提取特定类的检测结果 cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1, 7) # 对同一类的边界框进行排序,具有最大目标置信度的排在顶部 conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) #if nms has to be done if nms: #For each detection for i in range(idx): # 计算i索引的边界框与i之后的所有边界框的IoU try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break # 将IoU大于阈值的所有边界框清零 iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask #Remove the non-zero entries non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view( -1, 7) #Concatenate the batch_id of the image to the detection #this helps us identify which image does the detection correspond to #We use a linear straucture to hold ALL the detections from the batch #the batch_dim is flattened #batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.4): # 将小于对象置信度的行置0 conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2) prediction = prediction*conf_mask try: ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous() except: return 0 # 将中心坐标宽度高度转换为左上角以及右下角坐标 box_a = prediction.new(prediction.shape) box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2) box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2) box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2) prediction[:,:,:4] = box_a[:,:,:4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): #select the image from the batch image_pred = prediction[ind] #Get the class having maximum score, and the index of that class #Get rid of num_classes softmax scores #Add the class index and the class score of class having maximum score # torch.max返回最大值和最大值索引 max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_score = max_conf_score.float().unsqueeze(1) seq = (image_pred[:,:5], max_conf, max_conf_score) # shape [?,7] image_pred = torch.cat(seq, 1) #Get rid of the zero entries # torch.nonzero返回输入的非零元素的索引 non_zero_ind = (torch.nonzero(image_pred[:,4])) image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7) #Get the various classes detected in the image try: img_classes = unique(image_pred_[:,-1]) except: continue #WE will do NMS classwise 按类执行非极大值抑制 for cls in img_classes: #get the detections with one particular class # 取出image_pred_中的当前类别的行,按类别执行非极大值抑制 # image_pred_[:,-1] == cls 是[?]维tensor unsqueeze后变为[?,1]维tensor cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1) # class_mask_ind是[?,1]维的tensor class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1,7) #sort the detections such that the entry with the maximum objectness #confidence is at the top # conf_sort_index是[?,1]维tensor # 按照是否是目标的概率排序 conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) #if nms has to be done if nms: #For each detection for i in range(idx): #Get the IOUs of all boxes that come after the one we are looking at #in the loop try: # 选择i单独一行会自动去掉一个维度,要加上此维度 ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:]) except ValueError: break except IndexError: break #Zero out all the detections that have IoU > treshhold iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i+1:] *= iou_mask #Remove the non-zero entries # 乘以0以后所有的这一行的元素都变为0 non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view(-1,7) #Concatenate the batch_id of the image to the detection #this helps us identify which image does the detection correspond to #We use a linear straucture to hold ALL the detections from the batch #the batch_dim is flattened #batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq,1) write = True else: out = torch.cat(seq,1) output = torch.cat((output,out)) return output
def build_targets( pred_boxes, pred_conf, pred_cls, target, anchors, num_anchors, num_classes, grid_size, ignore_thres,device): nB = target.size(0) nA = num_anchors nC = num_classes nG = grid_size mask = torch.zeros(nB, nA, nG, nG,device=device) conf_mask = torch.ones(nB, nA, nG, nG,device=device) tx = torch.zeros(nB, nA, nG, nG,device=device) ty = torch.zeros(nB, nA, nG, nG,device=device) tw = torch.zeros(nB, nA, nG, nG,device=device) th = torch.zeros(nB, nA, nG, nG,device=device) tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0) tconf = tconf.to(device) tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0) tcls = tcls.to(device) nGT = 0 nCorrect = 0 for b in range(nB): for t in range(target[b].shape[0]): if target[b, t].sum() == 0: continue nGT += 1 # Convert to position relative to box gx = target[b, t, 1].item() * nG gy = target[b, t, 2].item() * nG gw = target[b, t, 3].item() * nG gh = target[b, t, 4].item() * nG # Get grid box indices gi = int(gx) gj = int(gy) # Get shape of gt box gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0) gt_box = gt_box.to(device) # Get shape of anchor box anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((len(anchors), 2)), np.array(anchors)), 1)) anchor_shapes = anchor_shapes.to(device) # Calculate iou between gt and anchor shapes anch_ious = bbox_iou(gt_box, anchor_shapes) # Where the overlap is larger than threshold set mask to zero (ignore) conf_mask[b, anch_ious > ignore_thres, gj, gi] = 0 # Find the best matching anchor box best_n = torch.argmax(anch_ious) # Get ground truth box gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0) # Get the best prediction gt_box = gt_box.to(device) pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0) pred_box = pred_box.to(device) # Masks mask[b, best_n, gj, gi] = 1 conf_mask[b, best_n, gj, gi] = 1 # Coordinates tx[b, best_n, gj, gi] = gx - gi ty[b, best_n, gj, gi] = gy - gj # Width and height tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16) th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16) # One-hot encoding of label target_label = int(target[b, t, 0]) tcls[b, best_n, gj, gi, target_label] = 1 tconf[b, best_n, gj, gi] = 1 # Calculate iou between ground truth and best matching prediction iou = bbox_iou(gt_box, pred_box) pred_label = torch.argmax(pred_cls[b, best_n, gj, gi]) score = pred_conf[b, best_n, gj, gi] if iou > 0.5 and pred_label == target_label and score > 0.5: nCorrect += 1 return nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4): conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2) prediction = prediction * conf_mask # try: # ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous() # except: # return 0 #使用每个框的两个对角坐标能更轻松地计算两个框的 IoU。 box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] #一次只能完成一张图像的置信度阈值设置和 NMS batch_size = prediction.size(0) # output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): #select the image from the batch image_pred = prediction[ind] #Get the class having maximum score, and the index of that class #Get rid of num_classes softmax scores #Add the class index and the class score of class having maximum score max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_score = max_conf_score.float().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) #Get rid of the zero entries 目的是处理无检测结果的情况。在这种情况下,我们使用 continue 来跳过对本图像的循环。 non_zero_ind = (torch.nonzero(image_pred[:, 4])) if non_zero_ind.shape[0] > 0: image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) else: continue if image_pred_.shape[0] == 0: continue # # only person # # print(image_pred_) # person_mask = image_pred_ * (image_pred_[:, -1] == 0).float().unsqueeze(1) # person_mask_ind = torch.nonzero(person_mask[:, -2]) # # print(image_pred_) # # # print(person_mask_ind.shape) # # print(person_mask_ind) # # # if person_mask_ind.shape[0] > 0: # image_pred_ = image_pred_[person_mask_ind.squeeze(),:].view(-1,7) # # print(image_pred_) # # print(123) # else: # continue # if image_pred_.shape[0] == 0: # continue # # end of only person #Get the various classes detected in the image try: #因为同一类别可能会有多个「真实」检测结果,所以我们使用一个名叫 unique 的函数来获取任意给定图像中存在的类别。 img_classes = unique(image_pred_[:, -1]) except: continue #WE will do NMS classwise 提取特定类别(用变量 cls 表示)的检测结果。 for cls in img_classes: #get the detections with one particular class cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1, 7) #sort the detections such that the entry with the maximum objectness #confidence is at the top conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) #if nms has to be done if nms: #For each detection for i in range(idx): #Get the IOUs of all boxes that come after the one we are looking at #in the loop try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break #Zero out all the detections that have IoU > treshhold iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask #Remove the non-zero entries non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view( -1, 7) #Concatenate the batch_id of the image to the detection #this helps us identify which image does the detection correspond to #We use a linear straucture to hold ALL the detections from the batch #the batch_dim is flattened #batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) try: return output except: return 0
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4, det_hm=False): """ https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-4/ prediction: (B x 10647 x 85) B: the number of images in a batch, 10647: the number of bounding boxes predicted per image. (52×52+26×26+13×13)×3=10647 85: the number of bounding box attributes. (c_x, c_y, w, h, object confidence, and 80 class scores) output: Num_obj × [img_index, x_1, y_1, x_2, y_2, object confidence, class_score, label_index] """ conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2) prediction = prediction * conf_mask box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): # select the image from the batch image_pred = prediction[ind] # Get the class having maximum score, and the index of that class # Get rid of num_classes softmax scores # Add the class index and the class score of class having maximum score max_conf, max_conf_index = torch.max(image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_index = max_conf_index.float().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_index) image_pred = torch.cat( seq, 1 ) # image_pred:(10647, 7) 7:[x1, y1, x2, y2, obj_score, max_conf, max_conf_index] # Get rid of the zero entries non_zero_ind = (torch.nonzero(image_pred[:, 4])) image_pred__ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) # filters out people id if det_hm: cls_mask = (image_pred__[:, -1] == 0).float() class_mask_ind = torch.nonzero(cls_mask).squeeze() image_pred_ = image_pred__[class_mask_ind].view(-1, 7) if torch.sum(cls_mask) == 0: return image_pred_ else: image_pred_ = image_pred__ # Get the various classes detected in the image try: # img_classes = unique(image_pred_[:, -1]) img_classes = torch.unique(image_pred_[:, -1], sorted=True).float() except: continue # We will do NMS classwise # import ipdb;ipdb.set_trace() for cls in img_classes: # get the detections with one particular class cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1, 7) # sort the detections such that the entry with the maximum objectness # confidence is at the top conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) # from soft_NMS import soft_nms # boxes = image_pred_class[:,:4] # scores = image_pred_class[:, 4] # k, N = soft_nms(boxes, scores, method=2) # image_pred_class = image_pred_class[k] # if nms has to be done if nms: # For each detection for i in range(idx): # Get the IOUs of all boxes that come after the one we are looking at # in the loop try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break # Zero out all the detections that have IoU > threshold iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask # Remove the zero entries non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view( -1, 7) # Concatenate the batch_id of the image to the detection # this helps us identify which image does the detection correspond to # We use a linear structure to hold ALL the detections from the batch # the batch_dim is flattened # batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4): conf_mask = (prediction[:, :, 4] > confidence).float().float().unsqueeze(2) prediction = prediction * conf_mask try: ind_nz = torch.nonzero(prediction[:, :, 4], as_tuple=False).transpose(0, 1).contiguous() except: return 0 box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False num = 0 for ind in range(batch_size): #select the image from the batch image_pred = prediction[ind] #Get the class having maximum score, and the index of that class #Get rid of num_classes softmax scores #Add the class index and the class score of class having maximum score max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_score = max_conf_score.float().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) #Get rid of the zero entries non_zero_ind = (torch.nonzero(image_pred[:, 4], as_tuple=False)) image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) #Get the various classes detected in the image try: img_classes = unique(image_pred_[:, -1]) except: continue #WE will do NMS classwise #print(img_classes) for cls in img_classes: if cls != 0: continue #get the detections with one particular class cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2], as_tuple=False).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1, 7) #sort the detections such that the entry with the maximum objectness #confidence is at the top conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) #if nms has to be done if nms: # Perform non-maximum suppression max_detections = [] while image_pred_class.size(0): # Get detection with highest confidence and save as max detection max_detections.append(image_pred_class[0].unsqueeze(0)) # Stop if we're at the last detection if len(image_pred_class) == 1: break # Get the IOUs for all boxes with lower confidence ious = bbox_iou(max_detections[-1], image_pred_class[1:]) # Remove detections with IoU >= NMS threshold image_pred_class = image_pred_class[1:][ious < nms_conf] image_pred_class = torch.cat(max_detections).data #Concatenate the batch_id of the image to the detection #this helps us identify which image does the detection correspond to #We use a linear straucture to hold ALL the detections from the batch #the batch_dim is flattened #batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) num += 1 if not num: return 0 return output
def write_results(predictions, confidence, num_class, nms=True, nms_thresh=0.4): # 保留预测结果中置信度大于给定阈值的部分 # confidence: shape=(1,10647, 85) 10647=3*(13*13+26*26+52*52) # mask: shape=(1,10647) => 增加一维度之后 (1, 10647, 1) mask = (predictions[:, :, 4] > confidence).float().unsqueeze( 2) # 保留预测结果中置信度大于阈值的bbox predictions = predictions * mask # 小于置信度的条目值全为0, 剩下部分不变 ind_nz = torch.nonzero(predictions[:, :, 4].squeeze()).squeeze() if ind_nz.size(0) == 0: return 0 # 如果没有检测任何有效目标,返回值为0 bbox = predictions.new( predictions.shape) # prediction的前五个数据分别表示 (Cx, Cy, w, h, score) # 将中心点和宽长度量方法改为左下右上坐标表示方法 bbox[:, :, 0] = (predictions[:, :, 0] - predictions[:, :, 2] / 2 ) # x1 = Cx - w/2 bbox[:, :, 1] = (predictions[:, :, 1] - predictions[:, :, 3] / 2 ) # y1 = Cy - h/2 bbox[:, :, 2] = (predictions[:, :, 0] + predictions[:, :, 2] / 2 ) # x2 = Cx + w/2 bbox[:, :, 3] = (predictions[:, :, 1] + predictions[:, :, 3] / 2 ) # y2 = Cy + h/2 predictions[:, :, :4] = bbox[:, :, :4] # 计算后的新坐标复制回去 batch_size = predictions.size(0) # dim=0 write = False # 拼接结果到output中最后返回 for ind in range(batch_size): prediction = predictions[ind] # 选择此batch中第ind个图像的预测结果 ind_nz = torch.nonzero(prediction[:, 4].squeeze()).squeeze() if ind_nz.size(0) == 0: continue prediction = prediction[ind_nz, :] # shape=(10647->14, 85) max_score, max_score_ind = torch.max(prediction[:, 5:], 1) # 获取每一个候选框最可能的类型score和相应的index max_score = max_score.float().unsqueeze( 1) # 维度扩展,shape=(10647->14) => (10647->14,1) max_score_ind = max_score_ind.float().unsqueeze(1) seq = (prediction[:, :5], max_score, max_score_ind) # 获取有价值的信息,共6个 prediction = torch.cat(seq, 1) try: image_classes = unique(prediction[:, -1]) # 获取当前图像检测结果中出现的所有类别 except: continue # 执行基于类别的NMS for cls in image_classes: class_mask = (prediction[:, -1] == cls) # 分离检测结果中属于当前类的数据 class_mask_ind = torch.nonzero( class_mask).squeeze() # shape=(n,1) => (n) prediction_class = prediction[class_mask_ind].view( -1, 7) # 从prediction中取出属于cls类别的所有结果,为下一步的nms的输入 ''' 到此步 prediction_class 已经存在了我们需要进行非极大值抑制的数据 ''' score_sort_ind = torch.sort(prediction_class[:, 4], descending=True)[1] # 返回排序索引 prediction_class = prediction_class[score_sort_ind] cnt = prediction_class.size(0) # 个数 '''开始执行 "非极大值抑制" 操作''' if nms: for i in range(cnt): try: ious = bbox_iou(prediction_class[i].unsqueeze(0), prediction_class[i + 1:]) except ValueError: break except IndexError: break iou_mask = (ious < nms_thresh).float().unsqueeze( 1) # 计算出需要移除的item prediction_class[i + 1:] *= iou_mask # 保留i自身 non_zero_ind = torch.nonzero(prediction_class[:, 4].squeeze()) prediction_class = prediction_class[non_zero_ind].view( -1, 7) # 移除 # 当前类的nms执行完之后,保存结果 batch_ind = prediction_class.new(prediction_class.size(0), 1).fill_(ind) seq = batch_ind, prediction_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
def write_results_half(prediction, confidence, num_classes, nms=True, nms_conf=0.4): conf_mask = (prediction[:, :, 4] > confidence).half().unsqueeze(2) prediction = prediction * conf_mask try: ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous() except: return 0 box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): #select the image from the batch image_pred = prediction[ind] #Get the class having maximum score, and the index of that class #Get rid of num_classes softmax scores #Add the class index and the class score of class having maximum score max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.half().unsqueeze(1) max_conf_score = max_conf_score.half().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) #Get rid of the zero entries non_zero_ind = (torch.nonzero(image_pred[:, 4])) try: image_pred_ = image_pred[non_zero_ind.squeeze(), :] except: continue #Get the various classes detected in the image img_classes = unique(image_pred_[:, -1].long()).half() #WE will do NMS classwise for cls in img_classes: #get the detections with one particular class cls_mask = image_pred_ * (image_pred_[:, -1] == cls).half().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind] #sort the detections such that the entry with the maximum objectness #confidence is at the top conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) #if nms has to be done if nms: #For each detection for i in range(idx): #Get the IOUs of all boxes that come after the one we are looking at #in the loop try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break #Zero out all the detections that have IoU > treshhold iou_mask = (ious < nms_conf).half().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask #Remove the non-zero entries non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind] #Concatenate the batch_id of the image to the detection #this helps us identify which image does the detection correspond to #We use a linear straucture to hold ALL the detections from the batch #the batch_dim is flattened #batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
def write_results_half(prediction, confidence, num_classes, nms=True, nms_conf=0.4): conf_mask = (prediction[:, :, 4] > confidence).half().unsqueeze(2) prediction = prediction * conf_mask try: ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous() except: return 0 box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): image_pred = prediction[ind] max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.half().unsqueeze(1) max_conf_score = max_conf_score.half().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) non_zero_ind = (torch.nonzero(image_pred[:, 4])) try: image_pred_ = image_pred[non_zero_ind.squeeze(), :] except: continue img_classes = unique(image_pred_[:, -1].long()).half() for cls in img_classes: cls_mask = image_pred_ * (image_pred_[:, -1] == cls).half().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind] conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) if nms: for i in range(idx): try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break iou_mask = (ious < nms_conf).half().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind] batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output