Пример #1
0
def evaluate(model, data_loader, device, save_path=None):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    y_true = []
    y_pred = []
    for image, targets in metric_logger.log_every(data_loader, 100, header):
        image = list(img.to(device) for img in image)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        torch.cuda.synchronize()

        model_time = time.time()
        outputs = model(image)
        outputs = [{k: v.to(cpu_device)
                    for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time
        metric_logger.update(model_time=model_time)

        for target, output in zip(targets, outputs):
            frame = target['image_id'].item()
            y_true.append([
                Detection(frame, None, label, *box)
                for box, label in zip(target['boxes'], target['labels'])
            ])
            y_pred.append([
                Detection(frame, None, label, *box, score)
                for box, label, score in zip(output['boxes'], output['labels'],
                                             output['scores'])
            ])

    evaluator_time = time.time()
    map, _, _ = mean_average_precision(y_true, y_pred, sort_method='score')
    evaluator_time = time.time() - evaluator_time
    metric_logger.update(map=map, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)

    torch.set_num_threads(n_threads)

    if save_path:
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        with open(save_path, 'w') as f:
            for frame_dets in y_pred:
                for d in frame_dets:
                    f.write(
                        f'{d.frame}, -1, {d.xtl}, {d.ytl}, {d.width}, {d.height}, {d.score}, -1, -1, -1\n'
                    )
Пример #2
0
def parse_annotations_from_xml(path):
    with open(path) as f:
        tracks = xmltodict.parse(f.read())['annotations']['track']

    annotations = []
    for track in tracks:
        id = track['@id']
        label = track['@label']
        boxes = track['box']
        for box in boxes:
            if label == 'car':
                parked = box['attribute']['#text'].lower() == 'true'
            else:
                parked = None
            annotations.append(
                Detection(frame=int(box['@frame']),
                          id=int(id),
                          label=label,
                          xtl=float(box['@xtl']),
                          ytl=float(box['@ytl']),
                          xbr=float(box['@xbr']),
                          ybr=float(box['@ybr']),
                          parked=parked))

    return annotations
Пример #3
0
def refine_bbox(last_detections, new_detection, k=0.5):
    # No refinement for the first two frames
    if len(last_detections) < 2:
        return new_detection

    # Predict coordinates of new detection from last two detections
    pred_detection_xtl = 2 * last_detections[1].xtl - last_detections[0].xtl
    pred_detection_ytl = 2 * last_detections[1].ytl - last_detections[0].ytl
    pred_detection_xbr = 2 * last_detections[1].xbr - last_detections[0].xbr
    pred_detection_ybr = 2 * last_detections[1].ybr - last_detections[0].ybr

    # Compute average of predicted coordinates and detected coordinates
    refined_xtl = new_detection.xtl * k + pred_detection_xtl * (1 - k)
    refined_ytl = new_detection.ytl * k + pred_detection_ytl * (1 - k)
    refined_xbr = new_detection.xbr * k + pred_detection_xbr * (1 - k)
    refined_ybr = new_detection.ybr * k + pred_detection_ybr * (1 - k)

    # Get refined detection
    refined_detection = Detection(frame=new_detection.frame,
                                  id=new_detection.id,
                                  label=new_detection.label,
                                  xtl=refined_xtl,
                                  ytl=refined_ytl,
                                  xbr=refined_xbr,
                                  ybr=refined_ybr,
                                  score=new_detection.score)

    return refined_detection
Пример #4
0
def task2_2(debug=False,
            det_path='data/AICity_data/train/S03/c010/det/det_mask_rcnn.txt'):
    """
    Object tracking: tracking with a Kalman filter
    """

    reader = AICityChallengeAnnotationReader(
        path='data/ai_challenge_s03_c010-full_annotation.xml')
    gt = reader.get_annotations(classes=['car'])
    reader = AICityChallengeAnnotationReader(path=det_path)
    dets = reader.get_annotations(classes=['car'])

    cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi')

    tracker = Sort()
    tracks = defaultdict(list)

    y_true = []
    y_pred = []
    acc = MOTAcumulator()
    for frame in dets.keys():
        detections = dets.get(frame, [])

        new_detections = tracker.update(
            np.array([[*d.bbox, d.score] for d in detections]))
        new_detections = [
            Detection(frame, int(d[-1]), 'car', *d[:4]) for d in new_detections
        ]

        y_true.append(gt.get(frame, []))
        y_pred.append(new_detections)

        acc.update(y_true[-1], y_pred[-1])

        if debug:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
            ret, img = cap.read()
            for d in new_detections:
                tracks[d.id].append(d.bbox)
                np.random.seed(d.id)
                color = tuple(np.random.randint(0, 256, 3).tolist())
                for dd in tracks[d.id]:
                    cv2.circle(img, (int(
                        (dd[0] + dd[2]) / 2), int((dd[1] + dd[3]) / 2)), 5,
                               color, -1)

            cv2.imshow('image', cv2.resize(img, (900, 600)))
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    cap.release()
    cv2.destroyAllWindows()

    ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car'])
    idf1, idp, idr = acc.get_idf1()
    print(
        f"AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, IDF1: {idf1:.4f}, IDP: {idp:.4f}, IDR: {idr:.4f}"
    )
Пример #5
0
def bounding_boxes(mask, min_height, max_height, min_width, max_width, frame):
    contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL,
                                           cv2.CHAIN_APPROX_SIMPLE)
    detections = []
    for c in contours:
        x, y, w, h = cv2.boundingRect(c)
        if min_width < w < max_width and min_height < h < max_height:
            detections.append(Detection(frame, None, 'car', x, y, x + w,
                                        y + h))
    return detections
Пример #6
0
    def propagate_track(self, frame_id):
        # propagate track as if there was a detection, perhaps object is temporarily occluded or not detected
        # use predicted bb as measurement
        det = Detection.det_from_numpy_array(
            self.history[self.history.shape[0] - 1])
        det.bbox = self.get_predicted_next_bb()
        # TODO: adjust bbox to have same width height but only propegate x,y centroid position
        # pred_c_x, pred_c_y = util.centroid_from_bb(self.get_predicted_next_bb())
        # wid, ht = util.wid_ht_from_bb(det.bbox)
        # det.bbox = np.array([pred_c_x - wid / 2,
        #                      pred_c_y - ht / 2,
        #                      pred_c_x + wid / 2,
        #                      pred_c_y + ht / 2], dtype=np.int32)

        det.frame_id = frame_id
        self.add_to_track(det)
Пример #7
0
def parse_annotations_from_txt(path):
    """
    MOTChallenge format [frame, ID, left, top, width, height, conf, -1, -1, -1]
    """

    with open(path) as f:
        lines = f.readlines()

    annotations = []
    for line in lines:
        data = line.split(',')
        annotations.append(
            Detection(frame=int(data[0]) - 1,
                      id=int(data[1]),
                      label='car',
                      xtl=float(data[2]),
                      ytl=float(data[3]),
                      xbr=float(data[2]) + float(data[4]),
                      ybr=float(data[3]) + float(data[5]),
                      score=float(data[6])))

    return annotations
Пример #8
0

if __name__ == '__main__':
    data_directoy = "../NSL-KDD/"
    train_file = data_directoy + "KDDTrain+.csv"
    train_file_20_percent = data_directoy + "KDDTrain+_20Percent.csv"
    test_file = data_directoy + "KDDTest+.csv"

    normal_file = data_directoy + "20/normal.csv"
    data_file = data_directoy + "20/data.csv"
    labels_file = data_directoy + "20/labels.csv"

    det = Detection(classes=classes,
                    threshold_percentile=threshold_percentile,
                    truth_size=truth_size,
                    truth_update_frac=truth_update_frac,
                    truth_save_folder=truth_save_folder,
                    outlier_save_folder=outlier_save_folder,
                    classifier_save_folder=classifier_save_folder)
    """
    normal_data = load_data(normal_file, categorical_feature_list=categorical_feature_list, categories_list=categories_list)

    X = load_data(data_file, categorical_feature_list=categorical_feature_list, categories_list=categories_list)

    y = load_labels(labels_file, labels=labels, classes=classes)
    y_cls = y['class']
    """

    df = pd.read_csv(data_directoy + train_file, header=None, names=headers)
    df_X = df[features].copy()
    df_y = df[target].copy()
Пример #9
0
def task1_1(architecture,
            start=0,
            length=None,
            save_path='results/week3',
            gpu=0,
            visualize=False,
            save_detection='detection_results/'):
    """
    Object detection: off-the-shelf
    """

    tensor = transforms.ToTensor()

    if architecture.lower() == 'fasterrcnn':
        model = detection.fasterrcnn_resnet50_fpn(pretrained=True)

    elif architecture.lower() == 'maskrcnn':
        model = detection.maskrcnn_resnet50_fpn(pretrained=True)
    else:
        raise ValueError(architecture)
    save_path = os.path.join(save_path, architecture)

    # Read Video and prepare ground truth
    cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi')
    if not length:
        length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    reader = AICityChallengeAnnotationReader(
        path='data/ai_challenge_s03_c010-full_annotation.xml')
    gt = reader.get_annotations(classes=['car'])
    gt = {frame: gt[frame] for frame in range(start, start + length)}

    # Start Inference
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
    model.to(device)
    model.eval()
    detections = {}
    y_true, y_pred = [], []

    if save_detection:
        path = os.path.join(save_detection, architecture)
        if not os.path.exists(path):
            os.makedirs(path)
        detection_file = open(f'{path}/{architecture.lower()}.txt', 'w')

    with torch.no_grad():
        for frame in range(start, length):
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
            ret, img = cap.read()

            # Transform input to tensor
            print(f'Predict: {frame}')
            start_t = time.time()

            x = [tensor(img).to(device)]
            preds = model(x)[0]
            print(
                f'Inference time per frame: {round(time.time() - start_t, 2)}')

            # filter car predictions and confidences
            joint_preds = list(
                zip(preds['labels'], preds['boxes'], preds['scores']))
            car_det = list(filter(lambda x: x[0] == 3, joint_preds))
            # car_det = list(filter(lambda x: x[2] > 0.70, car_det))
            car_det = get_nms(car_det, 0.7)

            # add detections
            detections[frame] = []
            for det in car_det:
                det_obj = Detection(frame=frame,
                                    id=None,
                                    label='car',
                                    xtl=float(det[1][0]),
                                    ytl=float(det[1][1]),
                                    xbr=float(det[1][2]),
                                    ybr=float(det[1][3]),
                                    score=det[2])

                detections[frame].append(det_obj)

                if save_detection:
                    detection_file.write(
                        f"{frame},-1,{det_obj.xtl},{det_obj.ytl},{det_obj.width},{det_obj.height},{det_obj.score},-1,-1,-1\n"
                    )

            y_pred.append(detections[frame])
            y_true.append(gt.get(frame, []))

    ap, prec, rec = mean_average_precision(y_true, y_pred, classes=['car'])
    print(
        f'Network: {architecture}, AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}'
    )

    if visualize:
        print(f'Saving result to {save_path}')
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        video_iou_plot(gt,
                       detections,
                       video_path='data/AICity_data/train/S03/c010/vdo.avi',
                       title=f'{architecture} detections',
                       save_path=save_path)

    cv2.destroyAllWindows()

    if save_detection:
        detection_file.close()
Пример #10
0
while True:
    _, img = vc.read()
    if img is None:
        break
    current_frame_position += 1
    img = cv2.resize(
        img,
        (int(img.shape[1] * scale_factor), int(img.shape[0] * scale_factor)))
    # detect objects in frame
    bboxes = detector.detect(img)
    detections = []  # our list of Detections objects
    bb = None
    current_time = datetime.datetime.now()
    if bboxes is not None and len(bboxes) > 0:
        for i, bb in enumerate(bboxes):
            det = Detection()
            det.bbox = np.array([bb[0], bb[1], bb[2], bb[3]])
            det.frame_id = current_frame_position
            detections.append(det)
            # DRAW DETECTIONS IN GREEN (B, G, R) = (0, 255, 0)
            cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2)
            # log the data, recall headers = ['time', 'id', 'x1', 'y1', 'x2', 'y2']
            bb_id = i
            data = [current_time, bb_id, bb[0], bb[1], bb[2], bb[3]]
            logger.log(data=data)

    tracker.update_tracks(detections=detections,
                          frame_id=current_frame_position)
    tracker.draw_tracks(img)
    cv2.imshow('img', img)
    key = cv2.waitKey(1)
Пример #11
0
def run():
    # create access to settings object
    settings = Settings()
    # initiate logging
    if settings.ENABLE_LOGGING is True:
        if not os.path.exists(settings.LOGGING_DIRECTORY):
            os.mkdir(settings.LOGGING_DIRECTORY)
        log_filename = os.path.join(settings.LOGGING_DIRECTORY,
                                    settings.OUTPUT_VIDEO_FILENAME + '.csv')
        # [current_frame_position, current_time_string, uid, latest_bbox, lidar_distance]
        headers = [
            'frame#', 'time', 'uid', 'x1', 'y1', 'x2', 'y2', 'lidar_d(ft)',
            'grnd_angle_d(ft)'
        ]
        data_logger = DataLogger(filename=log_filename, headers=headers)

    vc = cv2.VideoCapture()
    if settings.SIMULATION_MODE is True:
        vc.open(settings.INPUT_VIDEO_FILE)
    else:
        vc.open(settings.WEBCAM_ID)

    if settings.RECORD_VIDEO is True:
        if not os.path.exists(settings.OUTPUT_VIDEO_FOLDER):
            os.mkdir(settings.OUTPUT_VIDEO_FOLDER)
        # get first frame of video to get recording size
        _, frame = vc.read()
        # declare video writer obj
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        vw = cv2.VideoWriter(
            os.path.join(settings.OUTPUT_VIDEO_FOLDER,
                         settings.OUTPUT_VIDEO_FILENAME + '.avi'), fourcc,
            20.0, (frame.shape[1], frame.shape[0]))

    detector = CarDetector()
    tracker = MultipleObjectTracker()
    if settings.SIMULATION_MODE is True:
        lidar = LidarSensor(mode='simulation',
                            simulation_data_file=settings.SIMULATION_FILE)
    else:
        lidar = LidarSensor()
    distance_predictor = DistancePredictor()

    scale_factor = 0.75  # reduce frame by this size
    tracked_bbox = []  # a list of past bboxes
    current_frame_position = 0

    while True:
        # take a snapshot of the current time
        current_time = datetime.datetime.now()
        current_time_string = current_time.strftime(
            '%Y-%m-%d %H:%M:%S.%f')[:-3]
        # first get lidar distance
        lidar_distance = lidar.get_distance(
            frame_number=current_frame_position)
        # now acquire image and get detections
        _, img = vc.read()
        if img is None:
            print 'null image retrieved! frame={}'.format(
                current_frame_position)
            break
        current_frame_position += 1
        img = cv2.resize(img, (int(
            img.shape[1] * scale_factor), int(img.shape[0] * scale_factor)))
        # detect objects in frame
        bboxes = detector.detect(img)
        detections = []  # our list of Detections objects
        bb = None
        if bboxes is not None and len(bboxes) > 0:
            for i, bb in enumerate(bboxes):
                det = Detection()
                det.bbox = np.array([bb[0], bb[1], bb[2], bb[3]])
                det.frame_id = current_frame_position
                detections.append(det)
                # DRAW DETECTIONS IN GREEN (B, G, R) = (0, 255, 0)
                cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0),
                              2)
                # log the data, recall headers = ['time', 'id', 'x1', 'y1', 'x2', 'y2']
                bb_id = i
                data = [current_time, bb_id, bb[0], bb[1], bb[2], bb[3]]
        tracker.update_tracks(detections=detections,
                              frame_id=current_frame_position)
        # create a list of the current row for logging
        logging_data = []
        # get the "head" of each track
        current_tracks = tracker.get_track_heads()
        for current_track in current_tracks:
            latest_bbox = current_track.get_latest_bb()
            # get predicted distances from bounding box
            predicted_distances = distance_predictor.predict(latest_bbox)
            uid = current_track.uid
            logging_row = [
                current_frame_position, current_time_string, uid,
                latest_bbox[0], latest_bbox[1], latest_bbox[2], latest_bbox[3],
                lidar_distance, predicted_distances[0]
            ]
            logging_data.append(logging_row)
        if settings.ENABLE_LOGGING is True:
            data_logger.log(logging_data)
        tracker.draw_tracks(img)
        # draw lidar distance
        cv2.putText(img, 'lidar_dist = ' + str(int(lidar_distance)) + 'ft',
                    (0, 25), 1, 1.5, (255, 0, 0), 2)
        cv2.imshow('img', img)
        key = cv2.waitKey(1)
        if key == 27:
            break
        if settings.RECORD_VIDEO is True:
            # rescale image back to original shape
            img = cv2.resize(img, (int(img.shape[1] / scale_factor),
                                   int(img.shape[0] / scale_factor)))
            vw.write(img)
Пример #12
0
def launch_test_kalman_filter(save_path, distance_thresholds, min_track_len,
                              min_width, min_height, sequence, camera,
                              detector):
    save_video = False
    save_summary = False
    fps = 24
    os.makedirs(save_path, exist_ok=True)

    reader = AICityChallengeAnnotationReader(path='data/AICity_data/train/' +
                                             sequence + '/' + camera +
                                             '/gt/gt.txt')
    gt = reader.get_annotations(classes=['car'])
    reader = AICityChallengeAnnotationReader(path='data/AICity_data/train/' +
                                             sequence + '/' + camera +
                                             '/det/det_' + detector + '.txt')
    dets = reader.get_annotations(classes=['car'])

    cap = cv2.VideoCapture('data/AICity_data/train/' + sequence + '/' +
                           camera + '/vdo.avi')
    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if save_video:
        writer = imageio.get_writer(os.path.join(
            save_path,
            'task1_' + sequence + '_' + camera + '_' + detector + '.gif'),
                                    fps=fps)

    tracker = Sort()
    y_true = []
    tracks = []
    max_track = 0
    video_percentage = 1
    start = 0
    end = int(n_frames * video_percentage)

    for frame in trange(start, end, desc='Tracking'):

        detections_on_frame_ = dets.get(frame, [])
        detections_on_frame = []
        for d in detections_on_frame_:
            if min_width < (d.ybr - d.ytl) and min_height < (d.xbr - d.xtl):
                detections_on_frame.append(d)

        detections_on_frame = tracker.update(
            np.array([[*d.bbox, d.score] for d in detections_on_frame]))
        detections_on_frame = [
            Detection(frame, int(d[-1]), 'car', *d[:4])
            for d in detections_on_frame
        ]

        tracks, frame_tracks, max_track = update_tracks_by_overlap(
            tracks,
            detections_on_frame,
            max_track,
            refinement=False,
            optical_flow=None)

        y_true.append(gt.get(frame, []))

    idf1s = []
    for distance_threshold in distance_thresholds:
        accumulator = MOTAcumulator()
        y_pred = []

        moving_tracks = remove_static_tracks(tracks, distance_threshold,
                                             min_track_len)
        detections = []
        for track in moving_tracks:
            detections.extend(track.detections)
        detections = group_by_frame(detections)

        for frame in trange(start, end, desc='Accumulating detections'):

            if save_video:
                cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
                ret, img = cap.read()

                for det in y_true[frame]:
                    cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                                  (int(det.xbr), int(det.ybr)), (0, 255, 0), 6)

            frame_detections = []
            for det in detections.get(frame, []):
                frame_detections.append(det)
                if save_video:
                    cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                                  (int(det.xbr), int(det.ybr)), track.color, 6)
                    cv2.rectangle(img, (int(det.xtl), int(det.ytl)),
                                  (int(det.xbr), int(det.ytl) - 15),
                                  track.color, -6)
                    cv2.putText(img, str(det.id), (int(det.xtl), int(det.ytl)),
                                cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 6)
                    cv2.circle(img, track.detections[-1].center, 5,
                               track.color, -1)

            y_pred.append(frame_detections)

            if save_video:
                writer.append_data(cv2.resize(img, (600, 350)))

            accumulator.update(y_true[frame], y_pred[-1])

        ap, prec, rec = mean_average_precision(y_true,
                                               y_pred,
                                               classes=['car'],
                                               sort_method=None)
        print(f'AP: {ap:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}')
        print('Additional metrics:')
        summary = accumulator.get_idf1()
        # As mentioned in https://github.com/cheind/py-motmetrics:
        #     FAR = FalsePos / Frames * 100
        #     MOTP = (1 - MOTP) * 100
        print(summary)

        if save_summary:
            with open(
                    os.path.join(
                        save_path, 'task1_' + sequence + '_' + camera + '_' +
                        detector + '_' + str(distance_threshold) + '.txt'),
                    'w') as f:
                f.write(str(summary))

        idf1s.append(summary['idf1']['acc'] * 100)

    cv2.destroyAllWindows()
    if save_video:
        writer.close()

    return idf1s