def allNMS(self, th=0.1): ann1 = self.totorch() ##print(ann1) ret = [] mask = torch.nonzero(ann1[:,0]!=-1) if(mask.nelement()>0): clann = ann1.index_select(0, mask[:,0]) idx, cnt = nms(clann[:,2:].float(), clann[:,1].float(), th, 20000) tmp = clann.index_select(0, idx[:cnt].cpu()) if tmp.nelement()>0: ret.append(tmp) return Ann(dets=torch.cat(ret).numpy())
def getBoxes(preds, iid, conf=90, gth=0.5): preds1 = preds[preds[:, 0] == iid][:, 1:] #print(preds1) tmp = preds1[preds1[:, 1] > conf] p90 = torch.from_numpy(tmp) if (p90.nelement() > 0): ids, cnt = nms(p90[:, 2:].float(), p90[:, 1].float(), gth, 200) ann1 = torch.index_select(p90, 0, ids[:cnt].cpu().long()) return ann1 else: return None
def classNMS(self, th=0.1): ann1 = self.totorch() ret = [] for i in range(self.dets[:,0].max()): mask = torch.nonzero(ann1[:,0]==i) if(mask.nelement()>0): clann = ann1.index_select(0, mask[:,0]) idx, cnt = nms(clann[:,2:].float(), clann[:,1].float(), th, 20000) tmp = clann.index_select(0, idx[:cnt].cpu()) if tmp.nelement()>0: ret.append(tmp) return Ann(dets=torch.cat(ret).numpy())
def classNMS(ann1, th=0.1): ret = [] for i in range(5): mask = torch.nonzero(ann1[:, 0] == i) #print(mask) if (mask.nelement() > 0): clann = ann1.index_select(0, mask[:, 0]) idx, cnt = nms(clann[:, 2:].float(), clann[:, 1].float(), th, 200) #(cnt, clann.size(0)) tmp = clann.index_select(0, idx[:cnt].cpu()) if tmp.nelement() > 0: ret.append(tmp) return torch.cat(ret)
def get_ann(dets, p=0.33, th=0.33, fpups=False): loc, conf, priors = dets decoded_boxes = decode(loc, priors, [0.1, 0.2]) conf_scores = conf.t().contiguous() cl = 0 c_mask = conf_scores[cl].lt(p) if fpups: p_mask = conf_scores[-1].lt(p) c_mask = c_mask & p_mask scores = conf_scores[cl][c_mask] l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) if (boxes.nelement() == 0): return None ids, count = nms(boxes, 1 - scores, th, 200) ids = ids.cpu() #print(boxes) ann_dets = (boxes[ids[:count]] * args.size).round().numpy() #print(ann_dets) ann = Ann(dets=ann_dets) return ann
def forward(self, loc_data, conf_data, prior_data): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ num = loc_data.size(0) # batch size num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() for cl in range(1, self.num_classes): c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.dim() == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) flt = output.contiguous().view(num, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) return output
def validate(args, net, val_data_loader, val_dataset, iteration_num, iou_thresh=0.5): """Test a SSD network on an image database.""" print('Validating at ', iteration_num) num_images = len(val_dataset) num_classes = args.num_classes det_boxes = [[] for _ in range(len(CLASSES))] gt_boxes = [] print_time = True batch_iterator = None val_step = 100 count = 0 torch.cuda.synchronize() ts = time.perf_counter() for val_itr in range(len(val_data_loader)): if not batch_iterator: batch_iterator = iter(val_data_loader) torch.cuda.synchronize() t1 = time.perf_counter() images, targets, img_indexs = next(batch_iterator) batch_size = images.size(0) height, width = images.size(2), images.size(3) if args.cuda: images = Variable(images.cuda(), volatile=True) output = net(images) loc_data = output[0] conf_preds = output[1] prior_data = output[2] if print_time and val_itr % val_step == 0: torch.cuda.synchronize() tf = time.perf_counter() print('Forward Time {:0.3f}'.format(tf - t1)) for b in range(batch_size): gt = targets[b].numpy() gt[:, 0] *= width gt[:, 2] *= width gt[:, 1] *= height gt[:, 3] *= height gt_boxes.append(gt) decoded_boxes = decode(loc_data[b].data, prior_data.data, args.cfg['variance']).clone() conf_scores = net.softmax(conf_preds[b]).data.clone() for cl_ind in range(1, num_classes): scores = conf_scores[:, cl_ind].squeeze() c_mask = scores.gt( args.conf_thresh) # greater than minmum threshold scores = scores[c_mask].squeeze() # print('scores size',scores.size()) if scores.dim() == 0: # print(len(''), ' dim ==0 ') det_boxes[cl_ind - 1].append(np.asarray([])) continue boxes = decoded_boxes.clone() l_mask = c_mask.unsqueeze(1).expand_as(boxes) boxes = boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, counts = nms(boxes, scores, args.nms_thresh, args.topk) # idsn - ids after nms scores = scores[ids[:counts]].cpu().numpy() boxes = boxes[ids[:counts]].cpu().numpy() # print('boxes sahpe',boxes.shape) boxes[:, 0] *= width boxes[:, 2] *= width boxes[:, 1] *= height boxes[:, 3] *= height for ik in range(boxes.shape[0]): boxes[ik, 0] = max(0, boxes[ik, 0]) boxes[ik, 2] = min(width, boxes[ik, 2]) boxes[ik, 1] = max(0, boxes[ik, 1]) boxes[ik, 3] = min(height, boxes[ik, 3]) cls_dets = np.hstack( (boxes, scores[:, np.newaxis])).astype(np.float32, copy=True) det_boxes[cl_ind - 1].append(cls_dets) count += 1 if val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('im_detect: {:d}/{:d} time taken {:0.3f}'.format( count, num_images, te - ts)) torch.cuda.synchronize() ts = time.perf_counter() if print_time and val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('NMS stuff Time {:0.3f}'.format(te - tf)) print('Evaluating detections for itration number ', iteration_num) return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=iou_thresh)
def forward(self, x, targets): """Applies network layers and ops on input image(s) x. Args: x: input image or batch of images. Shape: [batch,3,300,300]. Return: Depending on phase: test: Variable(tensor) of output class label predictions, confidence score, and corresponding location predictions for each object detected. Shape: [batch,topk,7] train: list of concat outputs from: 1: confidence layers, Shape: [batch*num_priors,num_classes] 2: localization layers, Shape: [batch,num_priors*4] 3: priorbox layers, Shape: [2,num_priors*4] """ sources = list() loc = list() conf = list() has_lp = list() size_lp = list() offset = list() sources_2 = list() loc_2 = list() conf_2 = list() four_corners_2 = list() carplate_sources = list() carplate_loc = list() carplate_conf = list() carplate_four_corners = list() # apply vgg up to conv1_1 relu # TODO: may be conv1_1 features for k in range(2): x = self.vgg[k](x) if k == 1: # conv1_1 feature relu conv1_1_feat = x # apply vgg up to conv4_3 relu for k in range(2, 23): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) carplate_sources.append(s) # apply vgg up to fc7 for k in range(23, len(self.vgg)): x = self.vgg[k](x) sources.append(x) carplate_sources.append(x) # apply extra layers and cache source layer outputs for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) carplate_sources.append(x) # apply multibox head to source layers for (x, l, c, h, s, o) in zip(sources, self.loc, self.conf, self.has_lp, self.size_lp, self.offset): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) has_lp.append(h(x).permute(0, 2, 3, 1).contiguous()) size_lp.append(s(x).permute(0, 2, 3, 1).contiguous()) offset.append(o(x).permute(0, 2, 3, 1).contiguous()) loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) has_lp = torch.cat([o.view(o.size(0), -1) for o in has_lp], 1) size_lp = torch.cat([o.view(o.size(0), -1) for o in size_lp], 1) offset = torch.cat([o.view(o.size(0), -1) for o in offset], 1) # [num, num_classes, top_k, 10] rpn_rois = self.detect( loc.view(loc.size(0), -1, 4), # loc preds self.softmax(conf.view(conf.size(0), -1, self.num_classes)), # conf preds self.priors.cuda(), # default boxes 这个地方按照之前会有重大bug,参数分布在不同GPU上 self.sigmoid(has_lp.view(has_lp.size(0), -1, 1)), size_lp.view(size_lp.size(0), -1, 2), offset.view(offset.size(0), -1, 2)) # 解除这部分的可导 rpn_rois = rpn_rois.detach() # roi align or roi warping crop_height = self.size_2 crop_width = self.size_2 is_cuda = torch.cuda.is_available() # apply multibox head to source layers for (x, l, c, f) in zip(carplate_sources, self.carplate_loc, self.carplate_conf, self.carplate_four_corners): carplate_loc.append(l(x).permute(0, 2, 3, 1).contiguous()) carplate_conf.append(c(x).permute(0, 2, 3, 1).contiguous()) carplate_four_corners.append(f(x).permute(0, 2, 3, 1).contiguous()) carplate_loc = torch.cat([o.view(o.size(0), -1) for o in carplate_loc], 1) carplate_conf = torch.cat( [o.view(o.size(0), -1) for o in carplate_conf], 1) carplate_four_corners = torch.cat( [o.view(o.size(0), -1) for o in carplate_four_corners], 1) if self.phase == 'train': # rpn_rois: [num, num_classes, top_k, 10] # rois: [num, num_gt, 6], 6: IOU with GT, bbox(4), max iou with GT or not # target: [num, num_gt, 22], 10: bbox(4), has_lp, size(2), offset(2), # lp_bbox(4), lp_four_points(8), label # rois和target最外层是list, 里面是tensor,这样可以确保里面的tensor维度不同 proposal_target_offset = ProposalTargetLayer_offset() rois = proposal_target_offset(rpn_rois, targets, self.expand_num) gt_new = torch.empty(0) boxes_data_list = [] box_index_data_list = [] for idx in range(len(rois)): num_gt = targets[idx].shape[0] # 获取所有GT车牌的位置 targets_tensor = targets[idx] # car_center_x = (targets_tensor[:, 0].unsqueeze(1) + targets_tensor[:, 2].unsqueeze(1)) / 2.0 # car_center_y = (targets_tensor[:, 1].unsqueeze(1) + targets_tensor[:, 3].unsqueeze(1)) / 2.0 # car_center = torch.cat((car_center_x, car_center_y), 1) # lp_center = car_center + targets_tensor[:, 7:9] # lp_bbox = torch.cat((lp_center - targets_tensor[:, 5:7]/2, lp_center + targets_tensor[:, 5:7]/2), 1) lp_bbox = targets_tensor[:, 9:13] # 获取车牌的四点坐标 lp_four_points = targets_tensor[:, 13:21] # 获取在rois中的车牌GT,并且根据rois的左上角调整成新的车牌GT rois_squeeze = rois[idx][:num_gt, 1:-1] a_include_b_list = [] for i in range(num_gt): a_include_b_list.append( a_include_b(rois_squeeze[i, :], lp_bbox[i, :])) has_lp_list = [] for i in range(num_gt): has_lp_list.append(targets_tensor[i, 4].cpu().numpy() > 0) gt_in_rois_list = np.array(a_include_b_list) + 0 & np.array( has_lp_list) + 0 gt_in_rois_tensor = torch.tensor(gt_in_rois_list).type( torch.uint8).bool() rois_squeeze = rois_squeeze[gt_in_rois_tensor, :] lp_bbox = lp_bbox[gt_in_rois_tensor, :] lp_four_points = lp_four_points[gt_in_rois_tensor, :] if rois_squeeze.shape[0] > 0: # 调整车牌GT bbox rois_top_left = rois_squeeze[:, :2].repeat(1, 2) rois_width = rois_squeeze[:, 2] - rois_squeeze[:, 0] rois_height = rois_squeeze[:, 3] - rois_squeeze[:, 1] rois_size = torch.cat( (rois_width.unsqueeze(1), rois_height.unsqueeze(1)), 1).repeat(1, 2) gt_bbox = (lp_bbox - rois_top_left) / rois_size # 新的车牌四点 rois_top_left_2 = rois_squeeze[:, :2].repeat(1, 4) rois_size_2 = torch.cat( (rois_width.unsqueeze(1), rois_height.unsqueeze(1)), 1).repeat(1, 4) gt_four_points = (lp_four_points - rois_top_left_2) / rois_size_2 # GT label gt_label = torch.zeros((gt_bbox.shape[0], 1)) # is valid,说明这个gt是有效的,因为后面为了迎合多GPU合并必须有输出的情况,后面会伪造一些is not valid的数据 # TODO: 这是不太友好的做法 gt_valid = torch.ones((gt_bbox.shape[0], 1)) # concat gt_cur = torch.cat( (gt_bbox, gt_four_points, gt_label, gt_valid), 1) gt_new = torch.cat((gt_new, gt_cur), 0) # 按照损失创造第二个网络的GT,其中gt_2的list要跟后面的crops_torch的n一致,所以用for循环 for gt_idx in range(gt_cur.shape[0]): box_index_data_list.append(idx) # 当前图片的idx boxes_data = torch.zeros(rois_squeeze.shape) boxes_data[:, 0] = rois_squeeze[:, 1] boxes_data[:, 1] = rois_squeeze[:, 0] boxes_data[:, 2] = rois_squeeze[:, 3] boxes_data[:, 3] = rois_squeeze[:, 2] boxes_data_list.append( boxes_data[gt_idx, :].cpu().numpy()) # 当前的区域 if gt_new.shape[0] > 0: # 这是将车作为roi的做法 # Define the boxes ( crops ) # box = [y1/heigth , x1/width , y2/heigth , x2/width] boxes_data = torch.FloatTensor(boxes_data_list) # Create an index to say which box crops which image box_index_data = torch.IntTensor(box_index_data_list) # Create batch of images image_data = conv1_1_feat # Convert from numpy to Variables # image feature这部分还是需要可导的,参见ROIAlign源程序,训练时需要可导,测试时不需要可导 image_torch = to_varabile(image_data, is_cuda=is_cuda, requires_grad=True) boxes = to_varabile(boxes_data, is_cuda=is_cuda, requires_grad=False) box_index = to_varabile(box_index_data, is_cuda=is_cuda, requires_grad=False) # Crops and resize bbox1 from img1 and bbox2 from img2 # n*64*crop_height*crop_width crops_torch = CropAndResizeFunction.apply( image_torch, boxes, box_index, crop_height, crop_width, 0) # 第二个网络!!!!!!!!!!!!!!!!!!!!!!!!!! x_2 = crops_torch for k in range(4): x_2 = self.vgg_2[k](x_2) sources_2.append(x_2) for k in range(4, 9): x_2 = self.vgg_2[k](x_2) sources_2.append(x_2) for k in range(9, 14): x_2 = self.vgg_2[k](x_2) sources_2.append(x_2) # apply multibox head to source layers for (x_2, l_2, c_2, f_2) in zip(sources_2, self.loc_2, self.conf_2, self.four_corners_2): loc_2.append(l_2(x_2).permute(0, 2, 3, 1).contiguous()) conf_2.append(c_2(x_2).permute(0, 2, 3, 1).contiguous()) four_corners_2.append( f_2(x_2).permute(0, 2, 3, 1).contiguous()) loc_2 = torch.cat([o.view(o.size(0), -1) for o in loc_2], 1) conf_2 = torch.cat([o.view(o.size(0), -1) for o in conf_2], 1) four_corners_2 = torch.cat( [o.view(o.size(0), -1) for o in four_corners_2], 1) # 如果loc_2还是list,说明gt_new是没有的,第二个网络的预测和GT都为空 if isinstance(loc_2, list): output = ( carplate_loc.view(carplate_loc.size(0), -1, 4), carplate_conf.view(carplate_conf.size(0), -1, self.num_classes), self.carplate_priors, carplate_four_corners.view(carplate_four_corners.size(0), -1, 8), loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors, has_lp.view(has_lp.size(0), -1, 1), size_lp.view(size_lp.size(0), -1, 2), offset.view(offset.size(0), -1, 2), # 第二个网络 TODO: 这是非常不友好的做法 torch.zeros(1, self.priors_2.shape[0], 4), torch.zeros(1, self.priors_2.shape[0], 2), self.priors_2, torch.zeros(1, self.priors_2.shape[0], 8), torch.zeros(1, 14) # 最后一位为0表示这个GT not valid ) else: output = ( carplate_loc.view(carplate_loc.size(0), -1, 4), carplate_conf.view(carplate_conf.size(0), -1, self.num_classes), self.carplate_priors, carplate_four_corners.view(carplate_four_corners.size(0), -1, 8), loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors, has_lp.view(has_lp.size(0), -1, 1), size_lp.view(size_lp.size(0), -1, 2), offset.view(offset.size(0), -1, 2), # 第二个网络 loc_2.view(loc_2.size(0), -1, 4), conf_2.view(conf_2.size(0), -1, self.num_classes), self.priors_2, four_corners_2.view(four_corners_2.size(0), -1, 8), gt_new) elif self.phase == 'test': has_lp_th = 0.5 th = 0.5 # 包括车和车牌的检测结果 output = torch.zeros(1, 3, 200, 13) # 存储车的检测结果 output[0, 1, :, :5] = rpn_rois[0, 1, :, :5] # 这里把是否有车牌也考虑进来,有车并且有车牌的才去检测车牌 rois_idx = (rpn_rois[0, 1, :, 0] > th) & (rpn_rois[0, 1, :, 5] > has_lp_th) matches = rpn_rois[0, 1, rois_idx, :] if matches.shape[0] == 0: return output # 针对matches中offset,size以及扩大倍数在车内扩大 car_center = (matches[:, [1, 2]] + matches[:, [3, 4]]) / 2 lp_center = car_center + matches[:, [8, 9]] lp_bbox_top_left = lp_center - matches[:, [6, 7 ]] / 2 * self.expand_num lp_bbox_bottom_right = lp_center + matches[:, [ 6, 7 ]] / 2 * self.expand_num lp_bbox = torch.cat((lp_bbox_top_left, lp_bbox_bottom_right), 1) # 将扩大后的车牌区域限制在图片内 lp_bbox = torch.max(lp_bbox, torch.zeros(lp_bbox.shape)) lp_bbox = torch.min(lp_bbox, torch.ones(lp_bbox.shape)) # 将扩大后的车牌区域限制在检测到的车内 lp_bbox = torch.max(lp_bbox, matches[:, 1:3].repeat(1, 2)) lp_bbox = torch.min(lp_bbox, matches[:, 3:5].repeat(1, 2)) # [num_car, 4] rois_squeeze = lp_bbox # 这是将车作为roi的做法 # Define the boxes ( crops ) # box = [y1/heigth , x1/width , y2/heigth , x2/width] boxes_data = torch.zeros(rois_squeeze.shape) boxes_data[:, 0] = rois_squeeze[:, 1] boxes_data[:, 1] = rois_squeeze[:, 0] boxes_data[:, 2] = rois_squeeze[:, 3] boxes_data[:, 3] = rois_squeeze[:, 2] # Create an index to indicate which box crops which image box_index_data = torch.IntTensor(range(boxes_data.shape[0])) # Create a batch of 2 images # 这个地方非常关键,需要repeat,不然后面的feature全是0 !!!!!!!!!!!!!!! image_data = conv1_1_feat.repeat(rois_squeeze.shape[0], 1, 1, 1) # Convert from numpy to Variables # image feature这部分还是需要可导的 image_torch = to_varabile(image_data, is_cuda=is_cuda, requires_grad=False) boxes = to_varabile(boxes_data, is_cuda=is_cuda, requires_grad=False) box_index = to_varabile(box_index_data, is_cuda=is_cuda, requires_grad=False) # Crops and resize bbox1 from img1 and bbox2 from img2 # n*64*crop_height*crop_width crops_torch = CropAndResizeFunction.apply(image_torch, boxes, box_index, crop_height, crop_width, 0) # Visualize the crops # print(crops_torch.data.size()) # crops_torch_data = crops_torch.data.cpu().numpy().transpose(0, 2, 3, 1) # import matplotlib.pyplot as plt # for m in range(rois_squeeze.shape[0]): # fig = plt.figure() # currentAxis = plt.gca() # # pt = gt_2[m][0, :4].cpu().numpy() * self.size_2 # # coords = (pt[0], pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1 # # currentAxis.add_patch(plt.Rectangle(*coords, fill=False)) # plt.imshow(crops_torch_data[m, :, :, 33]) # plt.show() # 第二个网络!!!!!!!!!!!!!!!!!!!!!!!!!! x_2 = crops_torch for k in range(4): x_2 = self.vgg_2[k](x_2) sources_2.append(x_2) for k in range(4, 9): x_2 = self.vgg_2[k](x_2) sources_2.append(x_2) for k in range(9, 14): x_2 = self.vgg_2[k](x_2) sources_2.append(x_2) # apply multibox head to source layers for (x_2, l_2, c_2, f_2) in zip(sources_2, self.loc_2, self.conf_2, self.four_corners_2): loc_2.append(l_2(x_2).permute(0, 2, 3, 1).contiguous()) conf_2.append(c_2(x_2).permute(0, 2, 3, 1).contiguous()) four_corners_2.append( f_2(x_2).permute(0, 2, 3, 1).contiguous()) loc_2 = torch.cat([o.view(o.size(0), -1) for o in loc_2], 1) conf_2 = torch.cat([o.view(o.size(0), -1) for o in conf_2], 1) four_corners_2 = torch.cat( [o.view(o.size(0), -1) for o in four_corners_2], 1) output_2 = self.detect_2( loc_2.view(loc_2.size(0), -1, 4), self.softmax_2( conf_2.view(conf_2.size(0), -1, self.num_classes)), self.priors_2.cuda(), four_corners_2.view(four_corners_2.size(0), -1, 8)) # 这种方法是综合所有车里面的车牌检测结果,然后只选取所有结果的前200个 # (num_car, 200, 13) # output_2_pos = output_2[:, 1, :, :] # # (num_car, 2) # rois_size = rois_squeeze[:, 2:4] - rois_squeeze[:, :2] # rois_top_left = rois_squeeze[:, :2] # # (num_car, 200, 12) # rois_size_expand = rois_size.repeat(1, 6).unsqueeze(1).repeat(1, 200, 1) # # (num_car, 200, 12) # rois_top_left_expand = rois_top_left.repeat(1, 6).unsqueeze(1).repeat(1, 200, 1) # # (num_car, 200, 12) # output_2_pos[:, :, 1:] = output_2_pos[:, :, 1:] * rois_size_expand + rois_top_left_expand # # (num_car*200, 13) # output_2_pos_squeeze = output_2_pos.reshape(-1, output_2_pos.shape[2]) # _, indices = output_2_pos_squeeze[:, 0].sort(descending=True) # output_2_pos_squeeze_sorted = output_2_pos_squeeze[indices, :] # # (1, 2, 200, 13) # results_2 = output_2_pos_squeeze_sorted[:200, :].unsqueeze(0).unsqueeze(1).repeat(1, 2, 1, 1) # 这种方法是每辆车里面只选conf最大的车牌 # (num_car, 13) output_2_pos = output_2[:, 1, 0, :] # (num_car, 2) rois_size = rois_squeeze[:, 2:4] - rois_squeeze[:, :2] rois_top_left = rois_squeeze[:, :2] # (num_car, 12) rois_size_expand = rois_size.repeat(1, 6) # (num_car, 12) rois_top_left_expand = rois_top_left.repeat(1, 6) # (num_car, 12) output_2_pos[:, 1:] = output_2_pos[:, 1:] * rois_size_expand + rois_top_left_expand # Neuro num_car = output_2_pos.shape[0] # output[0, 2, :num_car, :] = output_2_pos # T**S output_carplate = self.carplate_detect( carplate_loc.view(carplate_loc.size(0), -1, 4), # loc preds self.carplate_softmax( carplate_conf.view(carplate_conf.size(0), -1, self.num_classes)), # conf preds self.carplate_priors.cuda(), # default boxes carplate_four_corners.view(carplate_four_corners.size(0), -1, 8)) # output[0, 2, :, :] = output_carplate[0, 1, :, :] # T**S+Neuro conf_thresh = 0.01 nms_thresh = 0.45 top_k = 200 output_carplate_TITS_Neuro = torch.cat( (output_2_pos, output_carplate[0, 1, :, :]), 0) output_carplate_TITS_Neuro = output_carplate_TITS_Neuro.detach() conf_scores = output_carplate_TITS_Neuro[:, 0] c_mask = conf_scores.gt(conf_thresh) scores = conf_scores[c_mask] boxes = output_carplate_TITS_Neuro[:, 1:5] corners = output_carplate_TITS_Neuro[:, 5:] from layers.box_utils import nms ids, count = nms(boxes, scores, nms_thresh, top_k) output[0, 2, :count] = torch.cat( (scores[ids[:count]].unsqueeze(1), boxes[ids[:count]], corners[ids[:count]]), 1) # 存储expand区域的结果,放在车后面,并设置flag output[0, 1, :num_car, 5:9] = lp_bbox output[0, 1, :num_car, 9] = 1 return output else: print("ERROR: Phase: " + self.phase + " not recognized") return return output
def test_net(net, save_root, exp_name, input_type, dataset, iteration, num_classes, thresh=0.5): """ Test a SSD network on an Action image database. """ val_data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) image_ids = dataset.ids save_ids = [] val_step = 250 num_images = len(dataset) video_list = dataset.video_list det_boxes = [[] for _ in range(len(CLASSES))] gt_boxes = [] print_time = True batch_iterator = None count = 0 torch.cuda.synchronize() ts = time.perf_counter() num_batches = len(val_data_loader) det_file = save_root + 'cache/' + exp_name + '/detection-' + str( iteration).zfill(6) + '.pkl' print('Number of images ', len(dataset), ' number of batchs', num_batches) frame_save_dir = save_root + 'detections/CONV-' + input_type + '-' + args.listid + '-' + str( iteration).zfill(6) + '/' print('\n\n\nDetections will be store in ', frame_save_dir, '\n\n') for val_itr in range(len(val_data_loader)): if not batch_iterator: batch_iterator = iter(val_data_loader) torch.cuda.synchronize() t1 = time.perf_counter() images, targets, img_indexs = next(batch_iterator) batch_size = images.size(0) height, width = images.size(2), images.size(3) if args.cuda: images = Variable(images.cuda(), volatile=True) output = net(images) loc_data = output[0] conf_preds = output[1] prior_data = output[2] if print_time and val_itr % val_step == 0: torch.cuda.synchronize() tf = time.perf_counter() print('Forward Time {:0.3f}'.format(tf - t1)) for b in range(batch_size): gt = targets[b].numpy() gt[:, 0] *= width gt[:, 2] *= width gt[:, 1] *= height gt[:, 3] *= height gt_boxes.append(gt) decoded_boxes = decode(loc_data[b].data, prior_data.data, cfg['variance']).clone() conf_scores = net.softmax(conf_preds[b]).data.clone() index = img_indexs[b] annot_info = image_ids[index] frame_num = annot_info[1] video_id = annot_info[0] videoname = video_list[video_id] # output_dir = frame_save_dir+videoname # if not os.path.isdir(output_dir): # os.makedirs(output_dir) # # output_file_name = output_dir+'/{:05d}.mat'.format(int(frame_num)) # save_ids.append(output_file_name) # sio.savemat(output_file_name, mdict={'scores':conf_scores.cpu().numpy(),'loc':decoded_boxes.cpu().numpy()}) for cl_ind in range(1, num_classes): scores = conf_scores[:, cl_ind].squeeze() c_mask = scores.gt( args.conf_thresh) # greater than minmum threshold scores = scores[c_mask].squeeze() # print('scores size',scores.size()) if scores.dim() == 0: # print(len(''), ' dim ==0 ') det_boxes[cl_ind - 1].append(np.asarray([])) continue boxes = decoded_boxes.clone() l_mask = c_mask.unsqueeze(1).expand_as(boxes) boxes = boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, counts = nms(boxes, scores, args.nms_thresh, args.topk) # idsn - ids after nms scores = scores[ids[:counts]].cpu().numpy() boxes = boxes[ids[:counts]].cpu().numpy() # print('boxes sahpe',boxes.shape) boxes[:, 0] *= width boxes[:, 2] *= width boxes[:, 1] *= height boxes[:, 3] *= height for ik in range(boxes.shape[0]): boxes[ik, 0] = max(0, boxes[ik, 0]) boxes[ik, 2] = min(width, boxes[ik, 2]) boxes[ik, 1] = max(0, boxes[ik, 1]) boxes[ik, 3] = min(height, boxes[ik, 3]) cls_dets = np.hstack( (boxes, scores[:, np.newaxis])).astype(np.float32, copy=True) det_boxes[cl_ind - 1].append(cls_dets) count += 1 if val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('im_detect: {:d}/{:d} time taken {:0.3f}'.format( count, num_images, te - ts)) torch.cuda.synchronize() ts = time.perf_counter() if print_time and val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('NMS stuff Time {:0.3f}'.format(te - tf)) print('Evaluating detections for itration number ', iteration) # #Save detection after NMS along with GT # with open(det_file, 'wb') as f: # pickle.dump([gt_boxes, det_boxes, save_ids], f, pickle.HIGHEST_PROTOCOL) return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=thresh)
def validate(args, net, val_data_loader, val_dataset, epoch, iou_thresh=0.5, num_gpu=1): """Test a SSD network on an image database.""" print('Validating at ', epoch) num_images = len(val_dataset) num_classes = args.num_classes det_boxes = [[] for _ in range(len(CLASSES))] gt_boxes = [] print_time = True val_step = 100 count = 0 net.eval() # switch net to evaluation modelen(val_data_loader)-2, torch.cuda.synchronize() ts = time.perf_counter() # create batch iterator batch_iterator = [[] for i in range(num_gpu)] max_x_y = 0 min_x_y = [] for i in range(num_gpu): batch_iterator[i] = iter(val_data_loader[i]) min_x_y.append(len(val_data_loader[i])) max_x_y = max(max_x_y, len(val_data_loader[i])) # print("len: ", len(train_data_loader[i])) iter_count = 0 t0 = time.perf_counter() dtype = torch.cuda.FloatTensor for val_itr in range(max_x_y): img_indexs = [] for ii in range(num_gpu): if val_itr >= min_x_y[ii]: batch_iterator[ii] = iter(val_data_loader[ii]) torch.cuda.synchronize() t1 = time.perf_counter() img_indexs = [] images, targets, img_in = next(batch_iterator[0]) img_indexs.append(img_in) img = torch.zeros([1, 3, 300, 300]) images = torch.cat((images, img.type_as(images)), 0) for ii in range(num_gpu - 1): img, targ, img_in = next(batch_iterator[ii + 1]) images = torch.cat((images, img), 0) img = (torch.ones([1, 3, 300, 300]) + ii) images = torch.cat((images, img.type_as(images)), 0) for iii in range(len(targ)): targets.append(targ[iii]) img_indexs.append(img_in) batch_size = images.size(0) - num_gpu height, width = images.size(2), images.size(3) if args.cuda: images = Variable(images.cuda(), volatile=True) output = net(images, img_indexs) loc_data = output[0] conf_preds = output[1] prior_data = output[2] prior_data = prior_data[:loc_data.size(1), :] if print_time and val_itr % val_step == 0: torch.cuda.synchronize() tf = time.perf_counter() print('Forward Time {:0.3f}'.format(tf - t1)) for b in range(batch_size): gt = targets[b].numpy() gt[:, 0] *= width gt[:, 2] *= width gt[:, 1] *= height gt[:, 3] *= height gt_boxes.append(gt) decoded_boxes = decode(loc_data[b].data, prior_data.data, args.cfg['variance']).clone() conf_scores = net.module.softmax(conf_preds[b]).data.clone() for cl_ind in range(1, num_classes): scores = conf_scores[:, cl_ind].squeeze() c_mask = scores.gt( args.conf_thresh) # greater than minmum threshold scores = scores[c_mask].squeeze() # print('scores size',scores.size()) if scores.dim() == 0: # print(len(''), ' dim ==0 ') det_boxes[cl_ind - 1].append(np.asarray([])) continue boxes = decoded_boxes.clone() l_mask = c_mask.unsqueeze(1).expand_as(boxes) boxes = boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, counts = nms(boxes, scores, args.nms_thresh, args.topk) # idsn - ids after nms scores = scores[ids[:counts]].cpu().numpy() boxes = boxes[ids[:counts]].cpu().numpy() # print('boxes sahpe',boxes.shape) boxes[:, 0] *= width boxes[:, 2] *= width boxes[:, 1] *= height boxes[:, 3] *= height for ik in range(boxes.shape[0]): boxes[ik, 0] = max(0, boxes[ik, 0]) boxes[ik, 2] = min(width, boxes[ik, 2]) boxes[ik, 1] = max(0, boxes[ik, 1]) boxes[ik, 3] = min(height, boxes[ik, 3]) cls_dets = np.hstack( (boxes, scores[:, np.newaxis])).astype(np.float32, copy=True) det_boxes[cl_ind - 1].append(cls_dets) count += 1 if val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('im_detect: {:d}/{:d} time taken {:0.3f}'.format( count, num_images, te - ts)) torch.cuda.synchronize() ts = time.perf_counter() if print_time and val_itr % val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('NMS stuff Time {:0.3f}'.format(te - tf)) print('Evaluating detections for epoch number ', epoch) return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=iou_thresh)
def test_net(net, priors, args, dataset, iteration, thresh=0.5 ): """ Test a SSD network on an Action image database. """ print('Test a SSD network on an Action image database') val_data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) print('Done making val dataset') image_ids = dataset.ids save_ids = [] val_step = 250 num_images = len(dataset) video_list = dataset.video_list det_boxes = [[] for _ in range(len(CLASSES[args.dataset]))] gt_boxes = [] print_time = True batch_iterator = None count = 0 torch.cuda.synchronize() ts = time.perf_counter() num_batches = len(val_data_loader) frame_save_dir = '{}detections/{:s}-eg{:02d}/'.format(args.save_root, args.exp_name, args.eval_gap) softmax = nn.Softmax(dim=2).cuda() for val_itr in range(len(val_data_loader)): if not batch_iterator: batch_iterator = iter(val_data_loader) torch.cuda.synchronize() t1 = time.perf_counter() images, ground_truths, _ , _, num_mt, img_indexs = next(batch_iterator) batch_size = images[0].size(0) #images = images.permute(1, 0, 2, 3, 4) height, width = images[0].size(3), images[0].size(4) images = [img.cuda(0, non_blocking=True) for img in images if not isinstance(img, list)] conf_preds, loc_data = net(images) # pdb.set_trace() conf_scores_all = softmax(conf_preds).clone() if print_time and val_itr%val_step == 0: torch.cuda.synchronize() tf = time.perf_counter() print('Forward Time {:0.3f}'.format(tf - t1)) for b in range(batch_size): inds = np.asarray([m * args.seq_len for m in range(num_mt[b])]) gt = ground_truths[b].numpy() gt = gt[inds] gt[:, 0] *= width gt[:, 2] *= width gt[:, 1] *= height gt[:, 3] *= height gt_boxes.append(gt) bloc_data = loc_data[b] #print(bloc_data.size(), prior_data.size()) decoded_boxes = decode_seq(bloc_data, priors, args.cfg['variance'], args.seq_len) decoded_boxes = decoded_boxes.cpu() conf_scores = conf_scores_all[b].cpu().clone() index = img_indexs[b] annot_info = image_ids[index] frame_num = annot_info[1][0]+1; video_id = annot_info[0]; videoname = video_list[video_id] output_dir = frame_save_dir+videoname if not os.path.isdir(output_dir): os.makedirs(output_dir) # for s in range(args.seq_len): output_file_name_tmp = output_dir + '/{:06d}.mat'.format(int(frame_num)) # save_ids.append(output_file_name_tmp) decoded_boxes_tmp = decoded_boxes.numpy() #print(output_file_name_tmp) sio.savemat(output_file_name_tmp, mdict={'scores': conf_scores.numpy(), 'loc': decoded_boxes_tmp}) decoded_boxes = decoded_boxes[:, :4].clone() for cl_ind in range(1, args.num_classes): scores = conf_scores[:, cl_ind].squeeze() c_mask = scores.gt(args.conf_thresh) # greater than minmum threshold scores = scores[c_mask].squeeze() # print('scores size',scores.size()) if scores.dim() == 0: # print(len(''), ' dim ==0 ') det_boxes[cl_ind - 1].append(np.asarray([])) continue boxes = decoded_boxes.clone() l_mask = c_mask.unsqueeze(1).expand_as(boxes) boxes = boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, counts = nms(boxes, scores, args.nms_thresh, args.topk) # idsn - ids after nms scores = scores[ids[:counts]].numpy() boxes = boxes[ids[:counts]].numpy() # print('boxes sahpe',boxes.shape) boxes[:, 0] *= width boxes[:, 2] *= width boxes[:, 1] *= height boxes[:, 3] *= height for ik in range(boxes.shape[0]): boxes[ik, 0] = max(0, boxes[ik, 0]) boxes[ik, 2] = min(width, boxes[ik, 2]) boxes[ik, 1] = max(0, boxes[ik, 1]) boxes[ik, 3] = min(height, boxes[ik, 3]) cls_dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=True) det_boxes[cl_ind - 1].append(cls_dets) count += 1 if val_itr%val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(count, num_images, te - ts)) torch.cuda.synchronize() ts = time.perf_counter() if print_time and val_itr%val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('NMS stuff Time {:0.3f}'.format(te - tf)) print('Evaluating detections for itration number ', iteration) #Save detection after NMS along with GT # with open(det_file, 'wb') as f: # pickle.dump([gt_boxes, det_boxes, save_ids], f, pickle.HIGHEST_PROTOCOL) # if args.dataset != 'daly00000000000000000000000000': # return 0, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], '\n\n\n AP is not COMPUTED for any of the classes in dataset \n\n\n' # else: return evaluate_detections(gt_boxes, det_boxes, CLASSES[args.dataset], iou_thresh=thresh)
def validate(args, net, priors, val_data_loader, val_dataset, iteration_num, iou_thresh=0.5): """Test a SSD network on an image database.""" print('Validating at ', iteration_num) num_images = len(val_dataset) num_classes = args.num_classes priors = priors.cuda() det_boxes = [[] for _ in range(len(CLASSES[args.dataset]))] gt_boxes = [] print_time = True batch_iterator = None val_step = 100 count = 0 torch.cuda.synchronize() ts = time.perf_counter() softmax = nn.Softmax(dim=2).cuda() with torch.no_grad(): for val_itr in range(len(val_data_loader)): if not batch_iterator: batch_iterator = iter(val_data_loader) torch.cuda.synchronize() t1 = time.perf_counter() images, ground_truths, _ , _, num_mt, img_indexs = next(batch_iterator) batch_size = images[0].size(0) #images = images.permute(1, 0, 2, 3, 4) height, width = images[0].size(3), images[0].size(4) images = [img.cuda(0, non_blocking=True) for img in images if not isinstance(img, list)] conf_preds, loc_data = net(images) # pdb.set_trace() conf_scores_all = softmax(conf_preds).clone() if print_time and val_itr%val_step == 0: torch.cuda.synchronize() tf = time.perf_counter() print('Forward Time {:0.3f}'.format(tf-t1)) for b in range(batch_size): # pdb.set_trace() inds = np.asarray([m*args.seq_len for m in range(num_mt[b])]) # pdb.set_trace() gt = ground_truths[b].numpy() gt = gt[inds] gt[:,0] *= width gt[:,2] *= width gt[:,1] *= height gt[:,3] *= height gt_boxes.append(gt) decoded_boxes = decode_seq(loc_data[b], priors, args.cfg['variance'], args.seq_len) decoded_boxes = decoded_boxes[:,:4].clone() conf_scores = conf_scores_all[b].clone() #Apply nms per class and obtain the results for cl_ind in range(1, num_classes): # pdb.set_trace() scores = conf_scores[:, cl_ind].squeeze() c_mask = scores.gt(args.conf_thresh) # greater than minmum threshold scores = scores[c_mask].squeeze() # reduce the dimension so if no element then # of dim is 0 if scores.dim() == 0: det_boxes[cl_ind - 1].append(np.asarray([])) continue boxes = decoded_boxes.clone() l_mask = c_mask.unsqueeze(1).expand_as(boxes) boxes = boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, counts = nms(boxes, scores, args.nms_thresh, args.topk) # idsn - ids after nms scores = scores[ids[:counts]].cpu().numpy() boxes = boxes[ids[:counts]].cpu().numpy() # print('boxes sahpe',boxes.shape) boxes[:,0] *= width boxes[:,2] *= width boxes[:,1] *= height boxes[:,3] *= height for ik in range(boxes.shape[0]): boxes[ik, 0] = max(0, boxes[ik, 0]) boxes[ik, 2] = min(width, boxes[ik, 2]) boxes[ik, 1] = max(0, boxes[ik, 1]) boxes[ik, 3] = min(height, boxes[ik, 3]) cls_dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=True) det_boxes[cl_ind-1].append(cls_dets) count += 1 if val_itr%val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(count, num_images, te-ts)) torch.cuda.synchronize() ts = time.perf_counter() if print_time and val_itr%val_step == 0: torch.cuda.synchronize() te = time.perf_counter() print('NMS stuff Time {:0.3f}'.format(te - tf)) print('Evaluating detections for itration number ', iteration_num) return evaluate_detections(gt_boxes, det_boxes, CLASSES[args.dataset], iou_thresh=iou_thresh)
def mark_detections(im_bgr, conf_scores, tu_str_class, decoded_boxes, w_h_ori, li_margin_ratio_l_r_t_b, li_color_class, top_k, th_conf, th_nms, det_boxes): num_classes = len(tu_str_class) + 1 w_ori, h_ori = w_h_ori for cl_ind in range(1, num_classes): #str_class = dataset.CLASSES[cl_ind - 1] str_class = tu_str_class[cl_ind - 1] #print('str_class : ', str_class) scores = conf_scores[:, cl_ind].squeeze() #c_mask = scores.gt(args.conf_thresh) # greater than minmum threshold c_mask = scores.gt(th_conf) # greater than minmum threshold scores = scores[c_mask] #print('scores.nelement() : ', scores.nelement()) if scores.nelement() == 0: #print(len(''), ' dim ==0 ') if det_boxes is not None: det_boxes[cl_ind - 1].append(np.asarray([])) continue boxes = decoded_boxes.clone() #print('boxes.shape ori : ', boxes.shape) l_mask = c_mask.unsqueeze(1).expand_as(boxes) boxes = boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class #ids, counts = nms(boxes, scores, args.nms_thresh, args.topk) # idsn - ids after nms ids, counts = nms(boxes, scores, th_nms, top_k) # idsn - ids after nms #print('counts : ', counts) scores = scores[ids[:counts]].cpu().numpy() boxes = boxes[ids[:counts]].cpu().numpy() boxes = rescale_with_margin_ratio(boxes, li_margin_ratio_l_r_t_b) # print('boxes sahpe',boxes.shape) boxes[:, 0] *= w_ori boxes[:, 2] *= w_ori boxes[:, 1] *= h_ori boxes[:, 3] *= h_ori rand_0_1 = np.random.rand(boxes.shape[0]) #print('boxes.shape after : ', boxes.shape) for ik in range(boxes.shape[0]): #print('ids[ik] : ', ids[ik].cpu().numpy()) boxes[ik, 0] = max(0, boxes[ik, 0]) boxes[ik, 2] = min(w_ori, boxes[ik, 2]) boxes[ik, 1] = max(0, boxes[ik, 1]) boxes[ik, 3] = min(h_ori, boxes[ik, 3]) #print('boxes[ik] : ', boxes[ik]) cv2.rectangle(im_bgr, (boxes[ik, 0], boxes[ik, 1]), (boxes[ik, 2], boxes[ik, 3]), li_color_class[cl_ind - 1], 1) hei = boxes[ik, 3] - boxes[ik, 1] y_offset = Y_OFFSET_GT_BOX + (hei - Y_OFFSET_GT_BOX) * rand_0_1[ik] #cv2.putText(t3_bgr, str_class, (int(boxes[ik, 0]), int(boxes[ik, 1] + y_offset)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color_class[cl_ind - 1]) cv2.putText(im_bgr, str_class, (int(boxes[ik, 0]), int(boxes[ik, 1] + y_offset)), cv2.FONT_HERSHEY_SIMPLEX, FONT_SCALE_DET_BOX, li_color_class[cl_ind - 1]) str_score = "{:.2f}".format(scores[ik]) #print('scores[ik] : ', scores[ik]); print('str_score : ', str_score); exit() cv2.putText(im_bgr, str_score, (int(boxes[ik, 0]), int(boxes[ik, 1] + y_offset + Y_OFFSET_DET_SCORE)), cv2.FONT_HERSHEY_SIMPLEX, FONT_SCALE_DET_BOX, li_color_class[cl_ind - 1]) #cv2.imshow('t3_bgr', t3_bgr); cv2.waitKey(); #exit() if det_boxes is not None: #print("det_boxes is not none"); exit() cls_dets = np.hstack( (boxes, scores[:, np.newaxis])).astype(np.float32, copy=True) det_boxes[cl_ind - 1].append(cls_dets) return im_bgr, det_boxes