Пример #1
0
def non_max_suppression(prediction,
                        conf_thresh=0.1,
                        iou_thresh=0.6,
                        merge=False,
                        agnostic=False,
                        multi_label=True,
                        max_det=300):
    """Performs Non-Maximum Suppression (NMS) on inference results

    Args:
    prediction(torch.Tensor): shape=[bs.-1,no(85)] note: box cords (x,y,w,h) have been decoded into input size.

    Returns:
         a list(len=bs) with element's shape: nx6 (x1, y1, x2, y2, conf, cls)
    """

    xc = prediction[..., 4] > conf_thresh  # candidates
    # Settings
    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
    redundant = True  # require redundant detections
    output = [None] * prediction.shape[0]  # list len=bs

    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
        x = x[
            xc[xi]]  # if confidence score/ objectness < conf_thres, passed it

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
            i, j = (x[:, 5:] > conf_thresh).nonzero(
                as_tuple=False).T  # (i,j) i索引1 j索引2
            # 一个Box选择置信度大于阈值的类别做预测,  note: x[i, j + 5, None]==> x[i,j+5]????
            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
        else:  # best class only
            # best class only( 一个Box只选择其中置信度最高的类别)
            conf, j = x[:, 5:].max(1, keepdim=True)
            x = torch.cat(
                (box, conf, j.float()),
                1)[conf.view(-1) >
                   conf_thresh]  # 二次筛选,排除掉最终的class_score<conf_thresh的标签

        # Filter by class

        # Apply finite constraint
        # if not torch.isfinite(x).all():
        #     x = x[torch.isfinite(x).all(1)]

        # If none remain process next image
        n = x.shape[0]  # number of boxes
        if not n:
            continue

        # Sort by confidence
        # x = x[x[:, 4].argsort(descending=True)]

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # 按照类别加入偏置量
        '''
            按照类别拉大不同类别之间的box间距,为之后的maxtrix weight加权合并奠定基础(即加权合并主要在同类别的bbox之间进行)
        '''
        boxes, scores = x[:, :4] + c, x[:,
                                        4]  # boxes (offset by class), scores
        i = nms(boxes, scores, iou_thresh)
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n <
                      3E3):  # Merge NMS (boxes merged using weighted mean)
            try:  # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
                iou = box_iou(boxes[i], boxes) > iou_thresh  # iou matrix
                weights = iou * scores[None]  # box weights
                x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(
                    1, keepdim=True)  # merged boxes
                if redundant:
                    i = i[iou.sum(1) > 1]  # require redundancy
            except:  # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139
                print(x, i, x.shape, i.shape)
                pass

        output[xi] = x[i]

    return output
Пример #2
0
def non_max_suppression(prediction,
                        conf_thresh=0.1,
                        iou_thresh=0.6,
                        merge=False,
                        agnostic=False,
                        multi_label=True,
                        max_det=300):
    """Performs Non-Maximum Suppression (NMS) on inference results

    Returns:
         detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
    """

    xc = prediction[..., 4] > conf_thresh  # candidates
    # Settings
    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
    redundant = True  # require redundant detections
    output = [None] * prediction.shape[0]
    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
        x = x[xc[xi]]  # confidence

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
            i, j = (x[:, 5:] > conf_thresh).nonzero(as_tuple=False).T
            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
        else:  # best class only
            conf, j = x[:, 5:].max(1, keepdim=True)
            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thresh]

        # Filter by class

        # Apply finite constraint
        # if not torch.isfinite(x).all():
        #     x = x[torch.isfinite(x).all(1)]

        # If none remain process next image
        n = x.shape[0]  # number of boxes
        if not n:
            continue

        # Sort by confidence
        # x = x[x[:, 4].argsort(descending=True)]

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
        i = nms(boxes, scores, iou_thresh)
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
            try:  # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
                iou = box_iou(boxes[i], boxes) > iou_thresh  # iou matrix
                weights = iou * scores[None]  # box weights
                x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
                if redundant:
                    i = i[iou.sum(1) > 1]  # require redundancy
            except:  # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139
                print(x, i, x.shape, i.shape)
                pass

        output[xi] = x[i]

    return output
Пример #3
0
    def step(self, blob):
        """This function should be called every timestep to perform tracking with a blob
        containing the image information.
        """
        for t in self.tracks:
            # add current position to last_pos list
            t.last_pos.append(t.pos.clone())
            t.track_count += 1

        ###########################
        # Look for new detections #
        ###########################

        # self.obj_detect.load_image(blob['data'][0])
        if self.public_detections:
            dets = blob['dets'].squeeze(dim=0)
            if dets.nelement() > 0:
                boxes, scores = self.obj_detect.predict_boxes(
                    blob['img'], dets)
            else:
                boxes = scores = torch.zeros(0).cuda()
        else:
            boxes, scores = self.obj_detect.detect(blob['img'])

        if boxes.nelement() > 0:
            boxes = clip_boxes_to_image(boxes, blob['img'].shape[-2:])

            # Filter out tracks that have too low person score
            inds = torch.gt(scores,
                            self.detection_person_thresh).nonzero().view(-1)
        else:
            inds = torch.zeros(0).cuda()

        if inds.nelement() > 0:
            det_pos = boxes[inds]

            det_scores = scores[inds]
        else:
            det_pos = torch.zeros(0).cuda()
            det_scores = torch.zeros(0).cuda()

        ##################
        # Predict tracks #
        ##################

        num_tracks = 0
        nms_inp_reg = torch.zeros(0).cuda()
        if len(self.tracks):
            # align
            if self.do_align:
                self.align(blob)

            # apply motion model
            if self.motion_model_cfg['enabled']:
                self.motion()
                self.tracks = [t for t in self.tracks if t.has_positive_area()]

            # regress
            person_scores = self.regress_tracks(blob)

            if len(self.tracks):
                # create nms input

                # nms here if tracks overlap
                keep = nms(self.get_pos(), person_scores,
                           self.regression_nms_thresh)
                print(f"tracks:{len(self.tracks)}\n")
                print(f"keep tracks index:{keep}\n")
                self.tracks_to_inactive([
                    self.tracks[i] for i in list(range(len(self.tracks)))
                    if i not in keep
                ])

                if keep.nelement() > 0:
                    if self.do_reid:
                        new_features = self.get_appearances(blob)
                        self.add_features(new_features)

        #####################
        # Create new tracks #
        #####################

        # !!! Here NMS is used to filter out detections that are already covered by tracks. This is
        # !!! done by iterating through the active tracks one by one, assigning them a bigger score
        # !!! than 1 (maximum score for detections) and then filtering the detections with NMS.
        # !!! In the paper this is done by calculating the overlap with existing tracks, but the
        # !!! result stays the same.
        if det_pos.nelement() > 0:
            keep = nms(det_pos, det_scores, self.detection_nms_thresh)
            det_pos = det_pos[keep]
            det_scores = det_scores[keep]

            # check with every track in a single run (problem if tracks delete each other)
            for t in self.tracks:
                nms_track_pos = torch.cat([t.pos, det_pos])
                nms_track_scores = torch.cat(
                    [torch.tensor([2.0]).to(det_scores.device), det_scores])
                keep = nms(nms_track_pos, nms_track_scores,
                           self.detection_nms_thresh)

                keep = keep[torch.ge(keep, 1)] - 1

                det_pos = det_pos[keep]
                det_scores = det_scores[keep]
                if keep.nelement() == 0:
                    break

        if det_pos.nelement() > 0:
            new_det_pos = det_pos
            new_det_scores = det_scores

            # try to reidentify tracks
            new_det_pos, new_det_scores, new_det_features = self.reid(
                blob, new_det_pos, new_det_scores)

            # add new
            if new_det_pos.nelement() > 0:
                self.add(new_det_pos, new_det_scores, new_det_features)

        ####################
        # Generate Results #
        ####################

        for t in self.tracks:
            if t.id not in self.results.keys():
                self.results[t.id] = {}
            self.results[t.id][self.im_index] = np.concatenate(
                [t.pos[0].cpu().numpy(),
                 np.array([t.score])])

        for t in self.inactive_tracks:
            t.count_inactive += 1

        self.inactive_tracks = [
            t for t in self.inactive_tracks if t.has_positive_area()
            and t.count_inactive <= self.inactive_patience
        ]

        # for t in self.tracks:
        #     t.track_count += 1

        self.im_index += 1
        self.last_image = blob['img'][0]
Пример #4
0
import torch
import torchvision
from torchvision.ops.boxes import nms

ld = torch.load('nms_db')

boxes = ld['boxes']
score = ld['score']
thres = 0.7

keep = nms(boxes, score, thres)

print('FINISH')
Пример #5
0
def test_one_epoch(dataloader, model, yolo_loss, cfg):
    confidence = cfg.yolo.inf_confidence
    iou_threshold = cfg.yolo.inf_iou_threshold
    inp_dim = cfg.dataset.inp_dim
    yolo_loss.set_img_size(inp_dim)
    model.eval()
    results = []
    dset_name = dataloader.dset_name
    torch.backends.cudnn.benchmark = True
    with torch.no_grad():
        for batch_idx, (images, targets) in enumerate(dataloader):
            # measure data loading time
            images = images.to('cuda', non_blocking=True)
            targets = [{
                k: v.to('cuda', non_blocking=True)
                for k, v in t.items()
            } for t in targets]

            out = model(images)
            predictions = yolo_loss(out)

            predictions[:, :, :4] = helper.get_abs_coord(predictions[:, :, :4])
            score = predictions[:, :, 4] * (predictions[:, :,
                                                        5:].max(axis=2)[0])
            pred_mask = score > confidence
            pred_conf = [(predictions[e][m]) for e, m in enumerate(pred_mask)]
            indices = [
                boxes.nms(pred_conf[i][:, :4], pred_conf[i][:, 4],
                          iou_threshold) for i in range(len(pred_conf))
            ]
            pred_final = [
                pred_conf[i][indices[i], :] for i in range(len(pred_conf))
            ]
            pred_final = list(filter(lambda t: t.shape[0] != 0, pred_final))

            for i, atrbs in enumerate(pred_final):
                xmin = atrbs[:, 0] / inp_dim * targets[i]['img_size'][1]
                ymin = atrbs[:, 1] / inp_dim * targets[i]['img_size'][0]
                xmax = atrbs[:, 2] / inp_dim * targets[i]['img_size'][1]
                ymax = atrbs[:, 3] / inp_dim * targets[i]['img_size'][0]
                w = xmax - xmin
                h = ymax - ymin

                scores = (atrbs[:, 4] * atrbs[:, 5:].max(axis=1)[0]).tolist()
                labels = (atrbs[:, 5:].max(axis=1)[1])
                if dset_name == 'coco':
                    labels = helper.torch80_to_91(labels).tolist()
                else:
                    labels = (labels + 1).tolist()
                bboxes = torch.stack((xmin, ymin, w, h), axis=1)
                areas = (bboxes[:, 2] * bboxes[:, 3]).tolist()
                bboxes = bboxes.tolist()
                temp = [{
                    'bbox': b,
                    'area': a,
                    'category_id': l,
                    'score': s,
                    'image_id': targets[i]['image_id'].item()
                } for b, a, l, s in zip(bboxes, areas, labels, scores)]

                results = list(itertools.chain(results, temp))

    return results
    def tracking_processing(self, boxes, boxes_ids, images, features,
                            ROI_images, original_image_sizes):
        device = list(self.parameters())[0].device
        # resize all the given box to be aligned
        tck_proposals = [
            resize_boxes(box, or_size, size) for box, or_size, size in zip(
                boxes, original_image_sizes, images.image_sizes)
            if box.nelement()
        ]

        tck_all_boxes = []
        tck_all_scores = []
        tck_all_labels = []
        tck_all_ids = []

        boxes_per_image = [
            boxes_in_image.shape[0] for boxes_in_image in tck_proposals
        ]

        if tck_proposals:
            tck_box_features = self.roi_heads.box_roi_pool(
                features, tck_proposals, images.image_sizes)
            tck_box_features = self.roi_heads.box_head(tck_box_features)
            tck_class_logits, tck_box_regression = self.roi_heads.box_predictor(
                tck_box_features)
            tck_boxes = self.roi_heads.box_coder.decode(
                tck_box_regression, tck_proposals)
            tck_scores = F.softmax(tck_class_logits, -1)

            tck_scores, tck_labels = tck_scores[:,
                                                self.selected_classes].max(1)
            tck_boxes = tck_boxes[:, self.selected_classes]
            tck_boxes = torch.cat([
                tck_boxes[idx][i].unsqueeze(0)
                for idx, i in enumerate(tck_labels)
            ])

            tck_boxes_list = tck_boxes.split(boxes_per_image, 0)
            tck_scores_list = tck_scores.split(boxes_per_image, 0)
            tck_labels_list = tck_labels.split(boxes_per_image, 0)

            for boxes, scores, labels, box_ids, ROI_image, image_shape, original_im_shape in zip(
                    tck_boxes_list, tck_scores_list, tck_labels_list,
                    boxes_ids, ROI_images, images.image_sizes,
                    original_image_sizes):

                boxes = clip_boxes_to_image(boxes, image_shape)

                # batch everything, by making every class prediction be a separate instance
                boxes = boxes.reshape(-1, 4)
                scores = scores.reshape(-1)
                labels = labels.reshape(-1)
                box_ids = box_ids.reshape(-1)

                # remove low scoring boxes
                keep = torch.nonzero(scores > self.tck_score_thresh).squeeze(1)
                boxes = boxes[keep]
                scores = scores[keep]
                labels = labels[keep]
                box_ids = box_ids[keep]

                # remove small boxes
                keep = self.remove_small_boxes_area(boxes,
                                                    min_size=self.tck_min_area)
                boxes = boxes[keep]
                scores = scores[keep]
                labels = labels[keep]
                box_ids = box_ids[keep]

                # non-maximum suppression, independently done per class
                keep = nms(boxes, scores, self.tck_nms_thresh)
                boxes = boxes[keep]
                scores = scores[keep]
                labels = labels[keep]
                box_ids = box_ids[keep]
                # keep only topk scoring predictions

                boxes = resize_boxes(boxes, image_shape, original_im_shape)

                if boxes.nelement():

                    keep = self.remove_boxes_out_roi(
                        boxes,
                        ROI_image,
                        min_in_porcentage=self.tck_min_ROI_in)
                    boxes = boxes[keep]
                    scores = scores[keep]
                    labels = labels[keep]
                    box_ids = box_ids[keep]

                tck_all_boxes.append(boxes)
                tck_all_scores.append(scores)
                tck_all_labels.append(labels)
                tck_all_ids.append(box_ids)
        else:
            tck_all_boxes.append(torch.empty(0, device=device))
            tck_all_scores.append(torch.empty(0, device=device))
            tck_all_labels.append(torch.empty(0, device=device))
            tck_all_ids.append(torch.empty(0, device=device))

        return tck_all_boxes, tck_all_scores, tck_all_labels, tck_all_ids
    def detections_processing(self, images, features, ROI_images, tck_boxes,
                              original_image_sizes):
        # rpn network
        det_proposals, proposal_losses = self.rpn(images, features)

        # roi heads to get boxes and scores
        det_box_features = self.roi_heads.box_roi_pool(features, det_proposals,
                                                       images.image_sizes)
        det_box_features = self.roi_heads.box_head(det_box_features)
        det_class_logits, det_box_regression = self.roi_heads.box_predictor(
            det_box_features)
        det_boxes = self.roi_heads.box_coder.decode(det_box_regression,
                                                    det_proposals)
        det_scores = F.softmax(det_class_logits, -1)

        boxes_per_image = [
            boxes_in_image.shape[0] for boxes_in_image in det_proposals
        ]

        # this gets the max score and gives the label
        # this mean just for 3 classes get just one
        det_scores, det_labels = det_scores[:, self.selected_classes].max(1)
        det_boxes = det_boxes[:, self.selected_classes]
        det_boxes = torch.cat([
            det_boxes[idx][i].unsqueeze(0) for idx, i in enumerate(det_labels)
        ])

        # split the boxes scores and labels for each image for post processing
        det_boxes_list = det_boxes.split(boxes_per_image, 0)
        det_scores_list = det_scores.split(boxes_per_image, 0)
        det_labels_list = det_labels.split(boxes_per_image, 0)

        det_all_boxes = []
        det_all_scores = []
        det_all_labels = []

        for boxes, scores, labels, ROI_image, tck_boxes_b, image_shape, original_im_shape in zip(
                det_boxes_list, det_scores_list, det_labels_list, ROI_images,
                tck_boxes, images.image_sizes, original_image_sizes):

            boxes = clip_boxes_to_image(boxes, image_shape)

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)

            # remove low scoring boxes
            inds = torch.nonzero(scores > self.det_score_thresh).squeeze(1)
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove small boxes
            keep = self.remove_small_boxes_area(boxes,
                                                min_size=self.det_min_area)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # remove too big boxes
            #keep = self.remove_big_boxes_area(boxes, max_size=self.det_max_area)
            #boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = nms(boxes, scores, self.det_nms_thresh)
            #boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # keep only topk scoring predictions
            keep = keep[:self.roi_heads.detections_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            boxes = resize_boxes(boxes, image_shape, original_im_shape)

            if boxes.nelement():
                keep = self.remove_boxes_out_roi(
                    boxes, ROI_image, min_in_porcentage=self.det_min_ROI_in)
                boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # filter detections in tracks
            for tck_box in tck_boxes_b:
                temp_boxes = torch.cat([tck_box.unsqueeze(0), boxes])
                temp_scores = torch.cat(
                    [torch.tensor([2.0]).to(boxes.device), scores])
                keep = nms(temp_boxes, temp_scores, self.det_nms_thresh)
                keep = keep[torch.ge(keep, 1)] - 1
                boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
                if keep.nelement() == 0:
                    break

            det_all_boxes.append(boxes)
            det_all_scores.append(scores)
            det_all_labels.append(labels)

        return det_all_boxes, det_all_scores, det_all_labels
Пример #8
0
def draw_according_nid(nid):
    """
    calculating the confusion matrix for single image, if the image is divided 
    into many subimages, merging them at first.  
    """
    path_img = os.path.join(img_root, '{}.png'.format(nid))
    image = cv2.imread(os.path.join(img_root, '{}.png'.format(nid)), 1)
    h, w, c = image.shape
    pred_txts = [x for x in txt_names if int(x.split('_')[0])==nid]
    pred_txts = sorted(pred_txts, key=lambda x:int(x.split('_')[1]))
    involved_ids = [int((x.split('_')[1])) for x in pred_txts]
    preds = []
    div = round(w/500.0)
    w_ = int(w//max(1, div))
    for n in involved_ids:
        with open(os.path.join(preds_root, '{}_{}.txt'.format(nid, n)), 'r') as f:
            tmp = f.readlines()
        pred = []
        if len(tmp)==0:
            pred = None
        else:
            pred = [np.array(list(map(float, e.split(' '))))[None, :] for e in tmp]
            pred = np.concatenate(pred)
            if n > 0:
              pred[:, 0] = pred[:, 0]+w_*n-100
              pred[:, 2] = pred[:, 2]+w_*n-100
            preds.append(pred)
    if len(preds)>0:
      preds = np.concatenate(preds)

      pred_boxes = preds[:, :-1]
      pred_scores = np.round(preds[:, -1], 2)
      #pred_boxes = preds[:, 0:]
      #pred_scores = np.zeros((pred_boxes.shape[0], ))
    else:
      pred_boxes = None
      pred_scores = None

    pred_boxes = torch.from_numpy(pred_boxes) if pred_boxes is not None else None
    if pred_boxes is not None:
        pred_scores = torch.from_numpy(pred_scores)
        _, order = torch.sort(pred_scores, 0, True)
        cls_dets = torch.cat((pred_boxes, pred_scores[:, None]), 1)
        cls_dets = cls_dets[order]
        keep = nms(pred_boxes[order, :], pred_scores[order], 0.3)
        cls_dets = cls_dets[keep.view(-1).long()]
        if occlusion_mask:
          img_occ_mask = cv2.imread(os.path.join(occlusion_mask_root, '{}.png'.format(nid)), 0).astype(np.int64)
          fg_mask = cv2.imread(os.path.join(fg_mask_root, '{}.png'.format(nid)), 0).astype(np.int64)
          img_occ_mask = np.where(img_occ_mask>0, 255, 0)
          img_occ_mask = np.clip(img_occ_mask-fg_mask, 0, 1).astype(np.int64)
          dets_stack = list()
          for e in cls_dets:
            if np.sum(img_occ_mask[int(e[1]):int(e[3]), int(e[0]):int(e[2])]) < 0.12*(e[3]-e[1])*(e[2]-e[0]):
                dets_stack.append(e)
          cls_dets = torch.stack(dets_stack)

        pred_boxes = cls_dets[:, 0:4]
        pred_scores = cls_dets[:, 4].numpy()
        draw(path_img, saved_image, pred_boxes, pred_scores, nid)
        with open(os.path.join(saved_txt, '{}.txt'.format(nid)), 'w') as f:
          for i, e in enumerate(cls_dets.numpy()):
            f.write(' '.join(map(str, e))+'\n')
def pipeline(img):
    '''
    Pipeline function for detection and tracking
    '''
    global frame_count
    global tracker_list
    global max_age
    global min_hits
    global track_id_list
    global debug
    global avg_fps
    frame_count += 1
    #print("")
    #print(frame_count)
    #print("")
    start = time.time()
    img_dim = (img.shape[1], img.shape[0])

    # YOLO detection for vehicle
    yolo_start = time.time()
    z_box = yolo_det.get_detected_boxes(img)
    #z_box_cpy= z_box
    yolo_end = time.time()

    # Lpd

    #print("Time taken for yolo detection is", yolo_end-yolo_start)
    track_start = time.time()
    if debug:
        print('Frame:', frame_count)

    x_box = []
    if debug:
        for i in range(len(z_box)):
            img1 = helpers.draw_box_label(img, z_box[i], box_color=(255, 0, 0))
            cv2.imshow("frame", img1)
            k = cv2.waitKey(10)
            if k == ord('e'):
                cv2.destroyAllWindows()
                sys.exit(-1)

        # plt.show()

    if len(tracker_list) > 0:
        for trk in tracker_list:
            x_box.append(trk.box)

    matched, unmatched_dets, unmatched_trks \
        = assign_detections_to_trackers(x_box, z_box, iou_thrd=0.3)
    if debug:
        print('Detection: ', z_box)
        print('x_box: ', x_box)
        print('matched:', matched)
        print('unmatched_det:', unmatched_dets)
        print('unmatched_trks:', unmatched_trks)

    # Deal with matched detections
    if matched.size > 0:
        for trk_idx, det_idx in matched:
            z = z_box[det_idx]
            tmp_trk = tracker_list[trk_idx]
            tmp_trk.features.append(extract_feature(img, z))
            z = np.expand_dims(z, axis=0).T
            tmp_trk.kalman_filter(z)
            xx = tmp_trk.x_state.T[0].tolist()
            xx = [xx[0], xx[2], xx[4], xx[6]]
            x_box[trk_idx] = xx
            tmp_trk.box = xx
            tmp_trk.hits += 1
            tmp_trk.no_losses = 0

    # Deal with unmatched detections
    if len(unmatched_dets) > 0:
        for idx in unmatched_dets:
            z = z_box[idx]
            if len(unmatched_trks) > 0:
                min_score = 10000000
                tmp_idx = -1
                for trk_idx in unmatched_trks:
                    trk = tracker_list[trk_idx]
                    #print(len(trk.features))
                    if len(trk.features) == 0:
                        continue
                    score = trk.feature_match(extract_feature(
                        img, z))  ## find closest feature match
                    if score < min_score:
                        min_score = score
                        tmp_idx = trk_idx
                if min_score < feature_thresh and tmp_idx != -1:

                    z = np.expand_dims(z, axis=0).T

                    tmp_trk = tracker_list[tmp_idx]
                    tmp_trk.kalman_filter(z)
                    xx = tmp_trk.x_state.T[0].tolist()
                    xx = [xx[0], xx[2], xx[4], xx[6]]
                    x_box[trk_idx] = xx
                    tmp_trk.box = xx
                    tmp_trk.hits += 1
                    tmp_trk.no_losses = 0
                    continue

            #new_boxes.append(z)
            z = np.expand_dims(z, axis=0).T
            tmp_trk = tr.Tracker()  # Create a new tracker
            x = np.array([[z[0], 0, z[1], 0, z[2], 0, z[3], 0]]).T
            tmp_trk.x_state = x
            tmp_trk.predict_only()
            xx = tmp_trk.x_state
            xx = xx.T[0].tolist()
            xx = [xx[0], xx[2], xx[4], xx[6]]
            tmp_trk.box = xx
            tmp_trk.id = track_id_list.popleft(
            )  # assign an ID for the tracker
            tracker_list.append(tmp_trk)
            x_box.append(xx)

    # Deal with unmatched tracks*100
    if len(unmatched_trks) > 0:
        for trk_idx in unmatched_trks:
            tmp_trk = tracker_list[trk_idx]
            tmp_trk.no_losses += 1
            tmp_trk.predict_only()
            xx = tmp_trk.x_state
            xx = xx.T[0].tolist()
            xx = [xx[0], xx[2], xx[4], xx[6]]
            tmp_trk.box = xx
            x_box[trk_idx] = xx

    # The list of tracks to be annotated
    img_vis = img.copy()
    good_tracker_list = []
    #print(img_dim)
    good_boxes = []
    for trk in tracker_list:
        if ((trk.hits >= min_hits) and (trk.no_losses <= max_age)):
            good_tracker_list.append(trk)
            good_boxes.append(trk.box)
    #for trk in good_tracker_list:
    selected_ids = nms(torch.FloatTensor(np.array(good_boxes)),
                       torch.FloatTensor([1.0] * len(good_boxes)), 0.45)
    for idx in selected_ids:
        trk = good_tracker_list[idx]
        x_cv2 = trk.box
        idx = trk.id
        if debug:
            print('updated box: ', x_cv2)
            print()
        # Draw the bounding boxes on the
        img_vis = helpers.draw_box_label(img_vis, x_cv2, idx)
        if frame_count % 5 == 0:
            y1_temp, x1_temp, y2_temp, x2_temp = x_cv2

            w_temp = x2_temp - x1_temp

            h_temp = y2_temp - y1_temp
            if w_temp * h_temp < 400 or w_temp <= 0 or h_temp <= 0 or min(
                    x_cv2) < 0:

                continue
            plates = []
            #print(x_cv2)
            dt_start = time.time()
            Ivehicle = img[y1_temp:y2_temp, x1_temp:x2_temp]
            ratio = float(max(Ivehicle.shape[:2])) / min(Ivehicle.shape[:2])
            side = int(ratio * 288.)
            bname = 'frame{}_{}.png'.format(frame_count, idx)

            bound_dim = min(side + (side % (2**4)), size)
            # print "\t\tBound dim: %d, ratio: %f" % (bound_dim,ratio)
            #dt_plates_start = time.time()
            Llp, LlpImgs, _ = detect_lp(wpod_net, im2single(Ivehicle),
                                        bound_dim, 2**4, (240, 80),
                                        lp_threshold)
            if len(LlpImgs):

                plates = [Llp[0].pts]
                cv2.imwrite("%s/%s" % (detected_plates_dir, bname),
                            LlpImgs[0] * 255.)
                plate_string = _lpr.plates_ocr(LlpImgs[0] * 255.)
                for plate in plates:
                    x1 = (plate[0][0] * w_temp + x1_temp).astype('int')
                    y1 = (plate[1][0] * h_temp + y1_temp).astype('int')
                    x2 = (plate[0][1] * w_temp + x1_temp).astype('int')
                    y2 = (plate[1][1] * h_temp + y1_temp).astype('int')
                    x3 = (plate[0][2] * w_temp + x1_temp).astype('int')
                    y3 = (plate[1][2] * h_temp + y1_temp).astype('int')
                    x4 = (plate[0][3] * w_temp + x1_temp).astype('int')
                    y4 = (plate[1][3] * h_temp + y1_temp).astype('int')

                    plate = np.array([[x1, y1], [x2, y2], [x3, y3], [x4, y4]],
                                     np.int32)
                    plate = plate.reshape((-1, 1, 2))
                    cv2.polylines(img_vis, [plate], True, (255, 0, 0), 4)
                    cv2.putText(img_vis, plate_string, (x1, y1),
                                cv2.FONT_HERSHEY_SIMPLEX, 1.1, (0, 0, 255), 2)
                cv2.imwrite("%s/%s" % (detected_cars_dir, bname),
                            img_vis[y1_temp:y2_temp, x1_temp:x2_temp])
    track_end = time.time()

    # images
    #    dt_start = time.time()

    print("Time taken to track the boxes is", track_end - track_start)
    end = time.time()
    fps = 1.0 / (end - start)
    #dt_fps = 1.0/(dt_dr+yolo_end-yolo_start)
    avg_fps += fps
    cv2.putText(img_vis, "FPS: {:.4f}".format(fps),
                (int(0.8 * img_dim[0]), 100), cv2.FONT_HERSHEY_SIMPLEX, 1.1,
                (255, 255, 0), 4)
    #cv2.putText(img_vis, "Detect FPS: {:.4f}".format(
    #    dt_fps), (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.1, (255,255, 0), 4)
    # Book keeping
    deleted_tracks = filter(lambda x: x.no_losses > feature_tp, tracker_list)

    for trk in deleted_tracks:
        track_id_list.append(trk.id)

    tracker_list = [x for x in tracker_list if x.no_losses <= feature_tp]

    if debug:
        print('Ending tracker_list: ', len(tracker_list))
        print('Ending good tracker_list: ', len(good_tracker_list))

    return img_vis