def compute_histogram_gt(gt_detections, video_path):
    capture = cv2.VideoCapture(video_path)
    n_frame = 0
    tracks_gt_with_hist = []

    while capture.isOpened():
        valid, image = capture.read()
        if not valid:
            break

        detections_on_frame = [x for x in gt_detections if x.frame == n_frame]

        for detection in detections_on_frame:
            track_corresponding = [
                t for t in tracks_gt_with_hist if t.id == detection.track_id
            ]
            if len(track_corresponding) > 0:
                track_corresponding[0].detections.append(
                    Detection(detection.frame,
                              detection.label,
                              detection.xtl,
                              detection.ytl,
                              detection.width,
                              detection.height,
                              detection.confidence,
                              track_id=detection.track_id,
                              histogram=rgb_histogram(
                                  image[detection.ytl:(detection.ytl +
                                                       detection.height),
                                        detection.xtl:(detection.xtl +
                                                       detection.width)])))
            else:
                track_corresponding = Track(detection.track_id, [
                    Detection(detection.frame,
                              detection.label,
                              detection.xtl,
                              detection.ytl,
                              detection.width,
                              detection.height,
                              detection.confidence,
                              track_id=detection.track_id,
                              histogram=rgb_histogram(
                                  image[detection.ytl:(detection.ytl +
                                                       detection.height),
                                        detection.xtl:(detection.xtl +
                                                       detection.width)]))
                ])
                tracks_gt_with_hist.append(track_corresponding)

        n_frame += 1

    return tracks_gt_with_hist
def transform_detection(det_0, homography1, homography2):
    minc, minr, maxc, maxr = det_0.bbox
    H1 = np.array(homography1)
    H2 = np.array(homography2)

    x = minc
    y = maxr
    det_1_hom = apply_homography_to_point(x, y, H1, H2)

    x_br = maxc
    y_br = maxr
    det_1_hom_br = apply_homography_to_point(x_br, y_br, H1, H2)

    original_ratio = (maxr - minr) / (maxc - minc)  # height/width of bbox
    width_transformed = abs(det_1_hom[0] - det_1_hom_br[0])
    height_transformed = original_ratio * width_transformed

    predicted_bbox_1 = [
        min(det_1_hom[0], det_1_hom_br[0]),
        min(det_1_hom[1], det_1_hom_br[1]) - height_transformed,
        max(det_1_hom[0], det_1_hom_br[0]),
        max(det_1_hom[1], det_1_hom_br[1])
    ]

    predicted_correspondence = Detection(
        det_0.frame,
        det_0.label,
        predicted_bbox_1[0],
        predicted_bbox_1[1],
        predicted_bbox_1[2] - predicted_bbox_1[0],
        predicted_bbox_1[3] - predicted_bbox_1[1],
        histogram=det_0.histogram)
    return predicted_correspondence
Пример #3
0
def filtering_parked(detections, video_path):

    print("Removing parked cars")

    capture = cv2.VideoCapture(video_path)
    n_frame = 0
    final_det = list()

    while capture.isOpened():
        valid, frame = capture.read()
        if not valid:
            break
        fr = frame.copy()
        detections_on_frame = [x for x in detections if x.frame == n_frame]
        detections_on_next_frame = [
            x for x in detections if x.frame == n_frame + PIXELS_SHIFT
        ]

        detections_bboxes = [o.bbox for o in detections_on_frame]
        detections_bboxes_next = [o.bbox for o in detections_on_next_frame]

        new_candidates = []
        # print(len(detections_bboxes))
        for candidate in detections_bboxes:
            minc, minr, maxc, maxr = candidate
            new_candidate = [minc, minr, maxc, maxr, 1]
            for next_candidate in detections_bboxes_next:
                n_minc, n_minr, n_maxc, n_maxr = next_candidate
                if (minc - WINDOW_FRAME <= n_minc <= minc + WINDOW_FRAME) and (
                        minr - WINDOW_FRAME <= n_minr <= minr + WINDOW_FRAME
                ) and (maxc - WINDOW_FRAME <= n_maxc <= maxc +
                       WINDOW_FRAME) and (n_maxr - WINDOW_FRAME <= n_maxr <=
                                          n_maxr + WINDOW_FRAME):
                    if new_candidate[4] != 0:
                        new_candidate[4] = 0
            new_candidates.append(new_candidate)

        #no_parked = [x for x in new_candidates if x[4] != 0]
        # print(len(new_candidates))

        for n, candidate in enumerate(new_candidates):
            if candidate[4] != 0:
                minc, minr, maxc, maxr, parked = candidate

                final_det.append(
                    Detection(detections_on_frame[n].frame,
                              detections_on_frame[n].label, minc, minr,
                              maxc - minc, maxr - minr,
                              detections_on_frame[n].confidence))

        # det_on_frame = [x for x in final_det if x.frame == n_frame]
        # det_bboxes = [o.bbox for o in det_on_frame]
        # for candidate in det_bboxes:
        #     minc, minr, maxc, maxr = candidate
        #     cv2.rectangle(fr, (minc, minr), (maxc, maxr), (0, 0, 255), 8)  # Red
        # cv2.imshow('fr', fr)
        # cv2.waitKey(0)
        n_frame += 1

    return final_det
Пример #4
0
def filtering_nms(detections, video_path):

    print("Applying non-maximum-supression")
    capture = cv2.VideoCapture(video_path)
    n_frame = 0
    final_det = list()

    while capture.isOpened():
        valid, frame = capture.read()
        if not valid:
            break
        fr = frame.copy()
        detections_on_frame = [x for x in detections if x.frame == n_frame]

        detections_bboxes = [o.bbox for o in detections_on_frame]
        pick = non_max_suppression_fast(detections_bboxes)
        for n, candidate in enumerate(detections_bboxes):
            if n in pick:
                minc, minr, maxc, maxr = candidate
                w = maxc - minc
                h = maxr - minr
                if w < 75 or h < 75:
                    pass
                else:
                    #cv2.rectangle(fr, (minc, minr), (maxc, maxr), (0, 0, 255), 8)  # Red
                    final_det.append(
                        Detection(detections_on_frame[n].frame,
                                  detections_on_frame[n].label, minc, minr,
                                  maxc - minc, maxr - minr,
                                  detections_on_frame[n].confidence))
        # cv2.imshow('fr', fr)
        # cv2.waitKey(10)
        n_frame += 1

    return final_det
def read_detections(path):
    # [frame, -1, left, top, width, height, conf, -1, -1, -1]
    frame_detections = []

    with open(path) as f:
        for line in f.readlines():
            parts = line.split(',')
            frame_id = int(parts[0])
            # while frame_id > len(frame_detections):
            #     frame_detections.append([])

            tl_x = int(float(parts[2]))
            tl_y = int(float(parts[3]))
            width = int(float(parts[4]))
            height = int(float(parts[5]))

            frame_detections.append(
                Detection(frame_id, 'car', tl_x, tl_y, width, height, 1))

    return frame_detections
Пример #6
0
def create_detections(detection_mat, frame_idx, w_img=0, h_img=0):
    """Create detections for given frame index from the raw detection matrix.

    Parameters
    ----------
    detection_mat : ndarray
        Matrix of detections. The first 10 columns of the detection matrix are
        in the standard MOTChallenge detection format. In the remaining columns
        store the feature vector associated with each detection.
    frame_idx : int
        The frame index.
    min_height : Optional[int]
        A minimum detection bounding box height. Detections that are smaller
        than this value are disregarded.

    Returns
    -------
    List[tracker.Detection]
        Returns detection responses at given frame index.

    """
    frame_indices = detection_mat[:, 0].astype(np.int)
    mask = frame_indices == frame_idx
    detection_list = []
    for row in detection_mat[mask]:
        bbox, confidence, feature = row[2:6], row[6], row[10:]
        '''
        bbx2 = bbox[0]+bbox[2] if bbox[0]+bbox[2]<=w_img else w_img
        bby2 = bbox[1]+bbox[3] if bbox[1]+bbox[3]<=h_img else h_img
        bbx1 = bbox[0] if bbox[0]>=0 else 0.0
        bby1 = bbox[1] if bbox[1]>=0 else 0.0
        bbox[0] = bbx1
        bbox[1] = bby1
        bbox[2] = bbx2 - bbx1
        bbox[3] = bby2 - bby1
        '''
        detection_list.append(Detection(bbox, confidence, feature))
    return detection_list
def track_objects_single(video_path, detections_list, gt_list, optical_flow = False, of_track= TrackingOF, display = False, export_frames = False, idf1 = True, save_pkl=True, name_pkl=''):

    colors = np.random.rand(500, 3)  # used only for display
    tracks = []
    max_track = -1
    new_detections = []
    of_detections = []

    if idf1:
        acc = mm.MOTAccumulator(auto_id=True)

    capture = cv2.VideoCapture(video_path)
    n_frame = 0
    pbar = tqdm(total=2140)

    while capture.isOpened():
        valid, image = capture.read()
        if not valid:
            break
        frame_tracks = {}

        detections_on_frame = [x for x in detections_list if x.frame == n_frame]
        gt_on_frame = [x for x in gt_list if x.frame == n_frame]

        tracks, unused_detections, frame_tracks = update_tracks(image, tracks, detections_on_frame, frame_tracks)
        tracks, max_track, frame_tracks = obtain_new_tracks(tracks, unused_detections, max_track, frame_tracks)

        if display and n_frame%2==0 and n_frame < 200:
            visualize_tracks(image, frame_tracks, colors, display=display)

        if export_frames:
            visualize_tracks_opencv(image, frame_tracks, colors, export_frames=export_frames,
                             export_path="output_frames/tracking/frame_{:04d}.png".format(n_frame))

        # IDF1 computing
        detec_bboxes = []
        detec_ids = []
        for key, value in frame_tracks.items():
            detec_ids.append(key)
            bbox = value['bbox']
            conf = value['confidence']
            detec_bboxes.append(bbox)
            cd = Detection(n_frame, 'car', bbox[0], bbox[1], bbox[2] - bbox[0],
                                            bbox[3] - bbox[1], conf, track_id=key,
                                            histogram=rgb_histogram(image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2]), :]))
            new_detections.append(cd)
        if optical_flow:
            of_detections.append(of_track.check_optical_flow(new_detections, n_frame))

        gt_bboxes = []
        gt_ids = []
        for gt in gt_on_frame:
            gt_bboxes.append(gt.bbox)
            gt_ids.append(gt.track_id)

        mm_gt_bboxes = [[(bbox[0]+bbox[2])/2, (bbox[1]+bbox[3])/2, bbox[2]-bbox[0], bbox[3]-bbox[1]] for bbox in gt_bboxes]
        mm_detec_bboxes = [[(bbox[0]+bbox[2])/2, (bbox[1]+bbox[3])/2, bbox[2] - bbox[0], bbox[3] - bbox[1]] for bbox in detec_bboxes]

        distances_gt_det = mm.distances.iou_matrix(mm_gt_bboxes, mm_detec_bboxes, max_iou=1.)
        if idf1:
            acc.update(gt_ids, detec_ids, distances_gt_det)

        pbar.update(1)
        n_frame += 1

    pbar.close()
    capture.release()
    cv2.destroyAllWindows()

    if idf1:
        print(acc.mot_events)
        mh = mm.metrics.create()
        summary = mh.compute(acc, metrics=mm.metrics.motchallenge_metrics, name='acc')
        with open("results/metrics.txt", "a") as f:
            f.write(summary.to_string() + "\n")
        print(summary)

    if save_pkl:
        with open('detections' + name_pkl+'.pkl', 'wb') as f:
            pickle.dump(new_detections, f)
        with open('tracks' + name_pkl+'.pkl', 'wb') as f:
            pickle.dump(tracks, f)

    return new_detections, tracks
def read_annotations_from_xml(annotation_path, video_path):
    """
    Arguments: 
    capture: frames from video, opened as cv2.VideoCapture
    root: parsed xml annotations as ET.parse(annotation_path).getroot()
    """
    capture = cv2.VideoCapture(video_path)
    root = ET.parse(annotation_path).getroot()

    ground_truths = []
    tracks = []
    images = []
    num = 0

    pbar = tqdm(total=2140)

    while capture.isOpened():
        valid, image = capture.read()
        if not valid:
            break
        #for now: (take only numannotated annotated frames)
        #if num > numannotated:
        #    break

        images.append(image)
        for track in root.findall('track'):
            gt_id = track.attrib['id']
            label = track.attrib['label']
            box = track.find("box[@frame='{0}']".format(str(num)))

            #if box is not None and (label == 'car' or label == 'bike'):    # Read cars and bikes
            if box is not None and label == 'car':  # Read cars

                if box.attrib['occluded'] == '1':  # Discard occluded
                    continue

                #if label == 'car' and box[0].text == 'true':               # Discard parked cars
                #    continue

                frame = int(box.attrib['frame'])
                #if frame < 534:
                #    continue

                xtl = int(float(box.attrib['xtl']))
                ytl = int(float(box.attrib['ytl']))
                xbr = int(float(box.attrib['xbr']))
                ybr = int(float(box.attrib['ybr']))
                ground_truths.append(
                    Detection(frame, label, xtl, ytl, xbr - xtl + 1,
                              ybr - ytl + 1, 1, gt_id))
                track_corresponding = [t for t in tracks if t.id == gt_id]
                if len(track_corresponding) > 0:
                    track_corresponding[0].detections.append(
                        Detection(frame + 1, label, xtl, ytl, xbr - xtl + 1,
                                  ybr - ytl + 1, 1))
                else:
                    track_corresponding = Track(gt_id, [
                        Detection(frame + 1, label, xtl, ytl, xbr - xtl + 1,
                                  ybr - ytl + 1, 1)
                    ])
                    tracks.append(track_corresponding)
        pbar.update(1)
        num += 1

    # print(ground_truths)
    pbar.close()
    capture.release()
    return ground_truths, tracks
def read_annotations_from_txt(gt_path, analyze=False):
    """
    Read annotations from the txt files
    Arguments:
    gt_path: path to .txt file
    :returns: list of Detection
    """
    ground_truths_list = list()
    tracks = []
    if analyze:
        max_w = 0
        min_w = 2000
        max_h = 0
        min_h = 2000
        min_ratio = 100
        max_ratio = 0
    with open(gt_path) as f:
        for line in f:
            data = line.split(',')
            #if int(data[0])-1 < 534:
            #    continue
            ground_truths_list.append(
                Detection(int(data[0]) - 1,
                          'car',
                          int(float(data[2])),
                          int(float(data[3])),
                          int(float(data[4])),
                          int(float(data[5])),
                          float(data[6]),
                          track_id=int(data[1])))
            track_corresponding = [t for t in tracks if t.id == int(data[1])]
            if len(track_corresponding) > 0:
                track_corresponding[0].detections.append(
                    Detection(int(data[0]) - 1,
                              'car',
                              int(float(data[2])),
                              int(float(data[3])),
                              int(float(data[4])),
                              int(float(data[5])),
                              float(data[6]),
                              track_id=int(data[1])))
            else:
                track_corresponding = Track(int(data[1]), [
                    Detection(int(data[0]) - 1,
                              'car',
                              int(float(data[2])),
                              int(float(data[3])),
                              int(float(data[4])),
                              int(float(data[5])),
                              float(data[6]),
                              track_id=int(data[1]))
                ])
                tracks.append(track_corresponding)
            if analyze:
                if int(data[4]) < min_w: min_w = int(data[4])
                if int(data[4]) > max_w: max_w = int(data[4])
                if int(data[5]) < min_h: min_h = int(data[5])
                if int(data[5]) > max_h: max_h = int(data[5])
                if int(data[5]) / int(data[4]) > max_ratio:
                    max_ratio = int(data[5]) / int(data[4])
                if int(data[5]) / int(data[4]) < min_ratio:
                    min_ratio = int(data[5]) / int(data[4])
    # print('width: [{0}, {1}]'.format(min_w, max_w))
    # print('height: [{0}, {1}]'.format(min_h, max_h))
    # print('ratio: [{0}, {1}]'.format(min_ratio, max_ratio))

    return ground_truths_list, tracks