Exemplo n.º 1
0
def read_detections(path,
                    drop_detection_prob: float = 0.0,
                    add_detection_noise: float = 0.0):
    """ parses and converts MOT16 benchmark annotations to known [xmin, ymin, xmax, ymax] format """
    path = os.path.expanduser(path)
    logger.debug('reading detections from %s' % path)
    if not os.path.isfile(path):
        raise ValueError('file does not exist')

    df = pd.read_csv(path, names=COL_NAMES)

    max_frame = df.frame_idx.max()
    for frame_idx in range(max_frame):
        detections = []
        for _, row in df[df.frame_idx == frame_idx].iterrows():
            if random.random() < drop_detection_prob:
                continue

            box = [
                row.bb_left, row.bb_top, row.bb_left + row.bb_width,
                row.bb_top + row.bb_height
            ]

            if add_detection_noise > 0:
                for i in range(4):
                    box[i] += random.uniform(-add_detection_noise,
                                             add_detection_noise)

            detections.append(Detection(box=box))

        yield frame_idx, detections
def detectFaceDNN(net, frame, conf_threshold=0.5):
    frameOpencvDnn = frame.copy()
    frameHeight = frameOpencvDnn.shape[0]
    frameWidth = frameOpencvDnn.shape[1]
    blob = cv2.dnn.blobFromImage(
        frameOpencvDnn,
        1.0,
        (300, 300),
        [104, 117, 123],
        False,
        False,
    )
    net.setInput(blob)
    detections = net.forward()
    bboxes = []
    out_detections = []
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > conf_threshold:
            x1 = int(detections[0, 0, i, 3] * frameWidth)
            y1 = int(detections[0, 0, i, 4] * frameHeight)
            x2 = int(detections[0, 0, i, 5] * frameWidth)
            y2 = int(detections[0, 0, i, 6] * frameHeight)
            bboxes.append([x1, y1, x2, y2])
            out_detections.append(
                Detection(box=[x1, y1, x2, y2], score=confidence))
            cv2.rectangle(
                frameOpencvDnn,
                (x1, y1),
                (x2, y2),
                (0, 255, 0),
                int(round(frameHeight / 150)),
                8,
            )
    return frameOpencvDnn, bboxes, out_detections
Exemplo n.º 3
0
def read_detections(results,
                    drop_detection_prob: float = 0.0,
                    add_detection_noise: float = 0.0):
    """ parses and converts MOT16 benchmark annotations to known [xmin, ymin, xmax, ymax] format """
    detections = []
    for i in range(len(results)):
        # for _, row in df[df.frame_idx == frame_idx].iterrows():
        if random.random() < drop_detection_prob:
            continue

        box = [
            results[i]['xmin'], results[i]['ymin'], results[i]['xmax'],
            results[i]['ymax']
        ]

        if add_detection_noise > 0:
            for i in range(4):
                box[i] += random.uniform(-add_detection_noise,
                                         add_detection_noise)

        detections.append(Detection(box=box))
        # print('detection box')
        # print (box)
        # print(Detection(box=box))

    return detections
Exemplo n.º 4
0
 def process_image(self, image: NpImage) -> Sequence[Detection]:
     t0 = time.time()
     boxes, scores, class_ids = self._predict(image)
     elapsed = (time.time() - t0) * 1000.
     logger.debug(f'inference time: {elapsed:.3f} ms')
     return [
         Detection(box=b, score=s, class_id=l)
         for b, s, l in zip(boxes, scores, class_ids)
     ]
Exemplo n.º 5
0
    def bboxes2out_detections(self, bboxes: BoundingBoxes):
        out_detections = []

        for bbox in bboxes.bounding_boxes:
            out_detections.append(
                Detection(box=[bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax],
                          score=bbox.probability))

        return out_detections
def run():
    # prepare multi object tracker
    model_spec = {
        'order_pos': 1,
        'dim_pos': 2,
        'order_size': 0,
        'dim_size': 2,
        'q_var_pos': 5000.,
        'r_var_pos': 0.1
    }

    dt = 1 / 15.0  # assume 15 fps
    tracker = MultiObjectTracker(dt=dt, model_spec=model_spec)

    # open camera
    cap = cv2.VideoCapture(0)

    face_detector = FaceDetector()

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, dsize=None, fx=0.5, fy=0.5)

        # run face detector on current frame
        bboxes = face_detector.process(frame)
        detections = [Detection(box=bbox) for bbox in bboxes]
        logger.debug(f'detections: {detections}')

        tracker.step(detections)
        tracks = tracker.active_tracks(min_steps_alive=3)
        logger.debug(f'tracks: {tracks}')

        # preview the boxes on frame
        for det in detections:
            draw_detection(frame, det)

        for track in tracks:
            draw_track(frame, track)

        cv2.imshow('frame', frame)

        # stop demo by pressing 'q'
        if cv2.waitKey(int(1000 * dt)) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 7
0
    def process_image(self, image: NpImage) -> Sequence[Detection]:
        blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104, 117, 123], False, False)
        self.net.setInput(blob)
        detections = self.net.forward()

        # convert output from OpenCV detector to tracker expected format [xmin, ymin, xmax, ymax]
        out_detections = []
        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > self.conf_threshold:
                xmin = int(detections[0, 0, i, 3] * image.shape[1])
                ymin = int(detections[0, 0, i, 4] * image.shape[0])
                xmax = int(detections[0, 0, i, 5] * image.shape[1])
                ymax = int(detections[0, 0, i, 6] * image.shape[0])
                out_detections.append(Detection(box=[xmin, ymin, xmax, ymax], score=confidence))

        return out_detections
Exemplo n.º 8
0
    def run(self):
        video = self.cam
        frame_num = 0
        ret, frame = video.read()
        height, width = frame.shape[:2]
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out_video = cv2.VideoWriter('output_23_4.avi', fourcc, 18,
                                    (width, frame_num))
        while 1:
            # try:
            print('--------------------------------')
            detection = []
            ret, frame = video.read()
            frame_num += 1
            if frame_num % 1 == 0:
                start = time()
                detections = self.detector.detect(frame)
                for det in detections:
                    detection.append(Detection(box=np.array(det[:4])))
                    # draw_detection(frame, Detection(box = np.array(det[:4])))

                self.tracker.step(detections=detection)
                tracks = self.tracker.active_tracks()

                self.process_trackers(frame, tracks)
                print("time : ", time() - start)
                frame = self.put_res(frame)
                frame = cv2.polylines(frame, np.array([self.polygon]), False,
                                      FINAL_LINE_COLOR, 1)
                out_video.write(frame)

                cv2.imshow('frame', frame)
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
        # except:
        #     pass
        out_video.release()
        cap.release()
        cv2.destroyAllWindows()
Exemplo n.º 9
0
import numpy as np

from motpy import Detection, MultiObjectTracker

# create a simple bounding box with format of [xmin, ymin, xmax, ymax]
object_box = np.array([1, 1, 10, 10])

# create a multi object tracker with a specified step time of 100ms
tracker = MultiObjectTracker(dt=0.1)

for step in range(10):
    # let's simulate object movement by 1 unit (e.g. pixel)
    object_box += 1

    # update the state of the multi-object-tracker tracker
    # with the list of bounding boxes
    tracker.step(detections=[Detection(box=object_box)])

    # retrieve the active tracks from the tracker (you can customize
    # the hyperparameters of tracks filtering by passing extra arguments)
    tracks = tracker.active_tracks()

    print('MOT tracker tracks %d objects' % len(tracks))
    print('first track box: %s' % str(tracks[0].box))

    #todo
    
Exemplo n.º 10
0
def run(videoName, dateVar, timeVar):

    ageProto = "/root/Project Metro/Models/age_deploy.prototxt"
    ageModel = "/root/Project Metro/Models/age_net.caffemodel"
    genderProto = "/root/Project Metro/Models/gender_deploy.prototxt"
    genderModel = "/root/Project Metro/Models/gender_net.caffemodel"
    MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746)
    ageList = [
        '(0-3)', '(4-6)', '(8-15)', '(15-18)', '(18-25)', '(30-45)', '(48-55)',
        '(60-100)'
    ]
    genderList = ['Male', 'Female']
    ageNet = cv2.dnn.readNet(ageModel, ageProto)
    genderNet = cv2.dnn.readNet(genderModel, genderProto)
    # initialize face detector
    #face_detector = cv2.CascadeClassifier("/root/Project Metro/Models/haarcascade_frontalface_default.xml")

    net = cv2.dnn.readNetFromCaffe("deploy.prototxt",
                                   "res10_300x300_ssd_iter_140000.caffemodel")
    (H, W) = (None, None)

    detect_interval = 1
    scale_rate = 0.75
    show_rate = 1
    colours = np.random.rand(32, 3)

    c = 0
    id_dict = {}

    wb = openpyxl.load_workbook('outputgad.xlsx')

    ws = wb.worksheets[0]
    rowno = ws.max_row + 1
    count = ws.max_row - 1

    webcam = cv2.VideoCapture(videoName)
    fps = webcam.get(cv2.CAP_PROP_FPS)

    if fps == 0:
        print("No Input Stream Detected")
        webcam.release()
        cv2.destroyAllWindows()
        return

    tracker = MultiObjectTracker(
        dt=1 / fps,
        tracker_kwargs={'max_staleness': 3},
        model_spec='constant_acceleration_and_static_box_size_2d',
        matching_fn_kwargs={'min_iou': 0.25})

    if not webcam.isOpened():
        print("No Input Stream Detected")
        webcam.release()
        cv2.destroyAllWindows()
        return

    if (videoName == 0):
        frameWidth = 500
        padding = 25
        threshold = 0.5
    else:
        frameWidth = 800
        padding = 20
        threshold = 0.8

    final_faces = []
    while (webcam.isOpened()):
        status, frame = webcam.read()

        if frame is None:
            print("Could not read frame")
            webcam.release()
            cv2.destroyAllWindows()
            break

        frame = imutils.resize(frame, width=frameWidth)

        (H, W) = frame.shape[:2]
        #frame = cv2.resize(frame, (0, 0), fx=scale_rate, fy=scale_rate)
        if not status:
            print("Could not read frame")
            webcam.release()
            cv2.destroyAllWindows()
            break

        #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        blob = cv2.dnn.blobFromImage(frame, 1.0, (W, H), (104.0, 177.0, 123.0))
        net.setInput(blob)
        detections = net.forward()
        rects = []

        for i in range(0, detections.shape[2]):
            # filter out weak detections by ensuring the predicted
            # probability is greater than a minimum threshold
            if detections[0, 0, i, 2] > threshold:
                # compute the (x, y)-coordinates of the bounding box for
                # the object, then update the bounding box rectangles list
                box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])
                rects.append(box.astype("int"))

            face_list = []
            for item in rects:
                xmin = item[0]
                ymin = item[1]
                xmax = item[0] + item[2]
                ymax = item[1] + item[3]
                face_list.append([xmin, ymin, xmax, ymax])

            final_faces = np.array(face_list)

        detections = [Detection(box=bbox) for bbox in final_faces]
        tracker.step(detections)
        tracks = tracker.active_tracks(min_steps_alive=0)
        for track in tracks:
            d = []
            d = track.box
            d_id = track.id
            d = d.astype(np.int32)
            x = d[0]
            y = d[1]
            w = d[2]
            h = d[3]
            if d_id not in id_dict.keys():

                #face = frame[y:h,x:w]

                face = frame[max(0, y -
                                 padding):min(h + padding, frame.shape[0] - 1),
                             max(0, x -
                                 padding):min(w + padding, frame.shape[1] - 1)]

                blob = cv2.dnn.blobFromImage(face,
                                             1.0, (227, 227),
                                             MODEL_MEAN_VALUES,
                                             swapRB=False)
                genderNet.setInput(blob)
                genderPreds = genderNet.forward()
                gender = genderList[genderPreds[0].argmax()]
                print('Person :', count + 1)
                print(f'Gender: {gender}')

                ageNet.setInput(blob)
                agePreds = ageNet.forward()
                age = ageList[agePreds[0].argmax()]

                print(f'Age: {age[1:-1]} years')
                id_dict[d_id] = gender

                c1 = ws.cell(row=rowno, column=1)
                c2 = ws.cell(row=rowno, column=2)
                c3 = ws.cell(row=rowno, column=3)
                c4 = ws.cell(row=rowno, column=4)

                c1.value = dateVar
                c2.value = timeVar
                c3.value = gender
                c4.value = age[1:-1]

                wb.save('outputgad.xlsx')

                count += 1
                rowno += 1

                cv2.imwrite(
                    "{0}/{1}{2}_{3}.jpg".format("facepics", gender, age, d_id),
                    face)

            cv2.rectangle(frame, (x, y), (w - x, h - y), (0, 255, 0), 2)

            cv2.putText(frame, f'{gender}, {age}', (d[0], d[1] - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2,
                        cv2.LINE_AA)

            resultframe = cv2.resize(frame, (0, 0), fx=show_rate, fy=show_rate)
            cv2.imshow("Detecting age and gender", resultframe)

        if cv2.waitKey(1) & 0XFF == ord('q'):
            print("Detection Stopped Manually")
            webcam.release()
            cv2.destroyAllWindows()
            break
Exemplo n.º 11
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    #create a multi object tracker
    tracker = MultiObjectTracker(dt=0.1)

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                out_detections = []
                for *xyxy, conf, cls in reversed(det):
                    object_box = np.array([
                        int(xyxy[0]),
                        int(xyxy[1]),
                        int(xyxy[2]),
                        int(xyxy[3])
                    ])
                    out_detections.append(
                        Detection(box=object_box, score=conf.to('cpu')))

                tracker.step(out_detections)
                tracks = tracker.active_tracks(3)

                for track in tracks:
                    label = f'{track.id[:5]}'
                    plot_one_box(track.box,
                                 im0,
                                 label=label,
                                 color=colors[int(cls)],
                                 line_thickness=3)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                key = cv2.waitKey(1)  # 1 millisecond
                if key == ord('q'):
                    break

    print(f'Done. ({time.time() - t0:.3f}s)')
Exemplo n.º 12
0
def run(videoName, dateVar, timeVar):

    ageProto = "/root/Project Metro/Models/age_deploy.prototxt"
    ageModel = "/root/Project Metro/Models/age_net.caffemodel"
    genderProto = "/root/Project Metro/Models/gender_deploy.prototxt"
    genderModel = "/root/Project Metro/Models/gender_net.caffemodel"
    MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746)
    ageList = [
        '(0-3)', '(4-6)', '(8-15)', '(15-18)', '(18-25)', '(30-45)', '(48-55)',
        '(60-100)'
    ]
    genderList = ['Male', 'Female']
    ageNet = cv2.dnn.readNet(ageModel, ageProto)
    genderNet = cv2.dnn.readNet(genderModel, genderProto)
    # initialize face detector
    face_detector = cv2.CascadeClassifier(
        "/root/Project Metro/Models/haarcascade_frontalface_default.xml")

    detect_interval = 1
    scale_rate = 0.75
    show_rate = 1
    colours = np.random.rand(32, 3)
    tracker = MultiObjectTracker(
        dt=1 / 25,
        tracker_kwargs={'max_staleness': 3},
        model_spec='constant_acceleration_and_static_box_size_2d',
        matching_fn_kwargs={'min_iou': 0.25})

    c = 0
    id_dict = {}

    wb = openpyxl.load_workbook('outputgad.xlsx')

    ws = wb.worksheets[0]
    rowno = ws.max_row + 1
    count = ws.max_row - 1

    webcam = cv2.VideoCapture(videoName)
    fps = webcam.get(cv2.CAP_PROP_FPS)

    if fps == 0:
        print("No Input Stream Detected")
        webcam.release()
        cv2.destroyAllWindows()
        return

    tracker = MultiObjectTracker(
        dt=1 / fps,
        tracker_kwargs={'max_staleness': 3},
        model_spec='constant_acceleration_and_static_box_size_2d',
        matching_fn_kwargs={'min_iou': 0.25})

    if not webcam.isOpened():
        print("No Input Stream Detected")
        webcam.release()
        cv2.destroyAllWindows()
        return

    padding = 20

    final_faces = []
    while (webcam.isOpened()):
        status, frame = webcam.read()
        #frame = cv2.resize(frame, (0, 0), fx=scale_rate, fy=scale_rate)
        if not status:
            print("Could not read frame")
            webcam.release()
            cv2.destroyAllWindows()
            return
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        if c % detect_interval == 0:
            #faces = face_detector.detectMultiScale(gray, 1.3, 5)
            faces = face_detector.detectMultiScale(gray,
                                                   scaleFactor=2,
                                                   minNeighbors=5,
                                                   minSize=(300, 300))
            faces = np.array(faces)
            face_sums = faces.shape[0]
            if face_sums > 0:
                face_list = []
                for item in faces:
                    xmin = item[0]
                    ymin = item[1]
                    xmax = item[0] + item[2]
                    ymax = item[1] + item[3]
                    face_list.append([xmin, ymin, xmax, ymax])

                final_faces = np.array(face_list)
        detections = [Detection(box=bbox) for bbox in final_faces]
        tracker.step(detections)
        tracks = tracker.active_tracks(min_steps_alive=0)
        for track in tracks:
            d = []
            d = track.box
            d_id = track.id
            d = d.astype(np.int32)
            x = d[0]
            y = d[1]
            w = d[2]
            h = d[3]
            if d_id not in id_dict.keys():
                face = frame[max(0, y -
                                 padding):min(h + padding, frame.shape[0] - 1),
                             max(0, x -
                                 padding):min(w + padding, frame.shape[1] - 1)]

                blob = cv2.dnn.blobFromImage(face,
                                             1.0, (227, 227),
                                             MODEL_MEAN_VALUES,
                                             swapRB=False)
                genderNet.setInput(blob)
                genderPreds = genderNet.forward()
                gender = genderList[genderPreds[0].argmax()]
                print('Person :', count + 1)
                print(f'Gender: {gender}')

                ageNet.setInput(blob)
                agePreds = ageNet.forward()
                age = ageList[agePreds[0].argmax()]

                print(f'Age: {age[1:-1]} years')
                id_dict[d_id] = gender

                c1 = ws.cell(row=rowno, column=1)
                c2 = ws.cell(row=rowno, column=2)
                c3 = ws.cell(row=rowno, column=3)
                c4 = ws.cell(row=rowno, column=4)

                c1.value = dateVar
                c2.value = timeVar
                c3.value = gender
                c4.value = age[1:-1]

                wb.save('outputgad.xlsx')

                count += 1
                rowno += 1

                cv2.imwrite(
                    "{0}/{1}{2}_{3}.jpg".format("facepics", gender, age, d_id),
                    face)

            cv2.rectangle(frame, (x, y), (w, h), (0, 255, 0), 2)

            cv2.putText(frame, f'{gender}, {age}', (d[0], d[1] - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2,
                        cv2.LINE_AA)

            resultframe = cv2.resize(frame, (0, 0), fx=show_rate, fy=show_rate)
            cv2.imshow("Detecting age and gender", resultframe)

        if cv2.waitKey(1) & 0XFF == ord('q'):
            print("Detection Stopped Manually")
            webcam.release()
            cv2.destroyAllWindows()
            return
            break
Exemplo n.º 13
0
def run():
    # prepare multi object tracker
    model_spec = {
        'order_pos': 1,
        'dim_pos': 2,
        'order_size': 0,
        'dim_size': 2,
        'q_var_pos': 5000.,
        'r_var_pos': 0.1
    }

    # model_spec = {
    #         'order_pos': 1, 'dim_pos': 2, # position is a center in 2D space; under constant velocity model
    #         'order_size': 0, 'dim_size': 2, # bounding box is 2 dimensional; under constant velocity model
    #         'q_var_pos': 1000., # process noise
    #         'r_var_pos': 0.1 # measurement noise
    #     }

    # tracker = MultiObjectTracker(dt=1 / 10, model_spec=model_spec)

    dt = 1 / 15.0  # assume 8 fps
    tracker = MultiObjectTracker(dt=dt, model_spec=model_spec)
    input_video = args.input_video

    # open camera
    cap = cv2.VideoCapture(input_video)

    # vid = imageio.get_reader(input_video, 'ffmpeg')

    people_detector = PeopleDetector()

    while (True):
        ret, frame = cap.read()

        # frame = cv2.resize(frame, dsize=None, fx=0.5, fy=0.5)

        # run face detector on current frame
        bboxes = people_detector.process(frame, args.confidence)
        detections = [Detection(box=bbox) for bbox in bboxes]
        logger.debug(f'detections: {detections}')

        tracker.step(detections)
        tracks = tracker.active_tracks(min_steps_alive=3)
        logger.debug(f'tracks: {tracks}')

        # preview the boxes on frame
        for det in detections:
            draw_detection(frame, det)

        for track in tracks:
            draw_track(frame, track)

        if cv2.waitKey(1) & 0xFF == ord('q') or ret == False:
            cap.release()
            cv2.destroyAllWindows()
            break
        cv2.imshow('frame', frame)

        # stop demo by pressing 'q'
        if cv2.waitKey(int(1000 * dt)) & 0xFF == ord('q'):
            break

    # cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 14
0
def main():
    ID_only = []

    verbose = args.verbose

    if args.model == 'yolov3':
        CONFIG_PATH, WEIGHTS_PATH = 'yolov3.cfg', 'yolov3.weights'

    if not os.path.isfile(WEIGHTS_PATH):
        logger.debug('downloading model...')
        urlretrieve('https://pjreddie.com/media/files/yolov3.weights', WEIGHTS_PATH)

    if args.input_video == 'mall':
        input_video = 'sample_mall_vid.mp4'
        fx, fy = 1, 1
        x1_loc = 140
        y1_loc = 240
        x2_loc = 340
        y2_loc = 250

    elif args.input_video == 'shop':
        input_video = 'sample_shop_vid.mp4'
        fx, fy = 0.7, 0.7
        x1_loc = 600
        y1_loc = 500
        x2_loc = 740
        y2_loc = 390

    update_text_font = ImageFont.truetype("arial.ttf", 15)

    # Load names of classes and get random colors
    classes = open('coco.names').read().strip().split('\n')

    accepted_classes = ['person', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase']
    accessory_ref_lst = ['backpack', 'umbrella', 'handbag', 'tie', 'suitcase']
    inner_keys = ['object', 'time', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase']


    idx_accepted = [0, 24, 25, 26, 27, 28]

    np.random.seed(42)

    colors = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8')

    # Give the configuration and weight files for the model and load the network.
    net = cv.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
    net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
    # net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
    if verbose: print('model loaded')

    # determine the output layer
    ln = net.getLayerNames()
    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]


    # open camera
    cap = cv2.VideoCapture(input_video)
    dt = 1 / 8.0  # assume 8 fps

    # prepare multi object tracker
    model_spec = {'order_pos': 1, 'dim_pos': 2,
                    'order_size': 0, 'dim_size': 2,
                    'q_var_pos': 5000., 'r_var_pos': 0.1}

    # prepare tracking
    tracker = MultiObjectTracker(dt=dt, model_spec=model_spec)

    # python dictionary to track people
    d = {
        'ID': 
            {'object': 'value_1', 'time': 'value_2', 'backpack': 'value_3', 'umbrella': 'value_4', 'handbag': 'value_5', 'tie': 'value_6', 'suitcase': 'value_7'}
    }

    d_bbox = {'ID': 
            {'x1': 'value_1', 'y1': 'value_2', 'x2': 'value_3', 'y2': 'value_4'}
    }

    arr_d = []

    ctr = 0
    clear_ctr = 0
    img_array = []
    while(True):
            # only process every 30 frames
        if args.input_video == 'shop' and ctr < 45:
            # shop example frozen for the first 40 frames
            ret, img = cap.read()
            ctr += 1
            continue
        # while True:
        ret, img = cap.read()
        clear_ctr += 1
        # save if end of video file
        if img is None:
            if args.save_bool:
                save_video(args.input_video, img_array, size)
                # exit
                break

        img = cv2.resize(img, dsize=None, fx=fx, fy=fy)
        size = (img.shape[1], img.shape[0])

        # construct a blob from the image
        blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)

        net.setInput(blob)
        if verbose: t0 = time.time()
        outputs = net.forward(ln)
        if verbose: t = time.time()
        if verbose: print('time=', t-t0)

        boxes = []
        confidences = []
        classIDs = []
        h, w = img.shape[:2]

        for output in outputs:
            for detection in output:
                scores = detection[5:]
                classID = np.argmax(scores)
                # ignore if not in classes we want
                if classID not in idx_accepted:
                    continue
                # logger.debug(f'class: {classes[classID]}')
                confidence = scores[classID]

                if confidence > 0.5:
                    box = detection[:4] * np.array([w, h, w, h])
                    (centerX, centerY, width, height) = box.astype("int")
                    x = int(centerX - (width / 2))
                    y = int(centerY - (height / 2))
                    box = [x, y, int(width), int(height)]
                    boxes.append(box)
                    confidences.append(float(confidence))
                    classIDs.append(classID)

        indices = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
        class_lst = []

        bboxes = []
        if len(indices) > 0:
            for i in indices.flatten():
                (x, y) = (boxes[i][0], boxes[i][1])
                (w, h) = (boxes[i][2], boxes[i][3])
                # # old version of boxes without ID tracking
                # color = [int(c) for c in colors[classIDs[i]]]
                # cv.rectangle(img, (x, y), (x + w, y + h), color, 2)
                # text = "{}: {:.4f}".format(classes[classIDs[i]], confidences[i])
                # cv.putText(img, text, (x, y - 5), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
                class_lst.append(classes[classIDs[i]])
                # getting the boundaries of the box for tracking
                xmin = int(x)
                ymin = int(y)
                xmax = int(x + w)
                ymax = int(y + h)
                bboxes.append([xmin, ymin, xmax, ymax])
        
        # if empty list
        if not class_lst:
            continue

        ''' detection adapated from https://learnopencv.com/goturn-deep-learning-based-object-tracking/ '''
        detections = [Detection(box=bbox) for bbox in bboxes]
        if verbose: logger.debug(f'detections: {detections}')
        
        # edited MOTPY tracker source code
        
        tracker.step(detections, class_lst)

        tracks = tracker.active_tracks(min_steps_alive=-1)
        if verbose: logger.debug(f'tracks: {tracks}')

        # prepare text for each person detected
        # text_arr = []



        # # preview the boxes on frame
        # for det in detections:
        #     draw_detection(img, det)

        u_x_p = []
        u_y_p = []
        u_x_a = []
        u_y_a = []

        people_track_lst = []
        accessories_track_lst = []
        for idx, track in enumerate(tracks):
            bound_box = track[1]

            ID = track[0].split('-')[0]
            class_ID = track[0].split('-')[1]
            
            # append to sort 
            if class_ID == 'person':
                people_track_lst.append(track)
                u_x_p.append(mean([bound_box[0], bound_box[2]]))
                u_y_p.append(mean([bound_box[1], bound_box[3]]))
                custom_draw_track(img, track, 'person')

            else:
                accessories_track_lst.append(track)
                u_x_a.append(mean([bound_box[0], bound_box[2]]))
                u_y_a.append(mean([bound_box[1], bound_box[3]]))
                custom_draw_track(img, track, 'accessory')

            # custom_draw_track(img, track, text_arr[idx])

        time_stamp = time.strftime("%Y%m%d%H%M%S")

        # combine the track list, but accessories ordered last
        track_list = people_track_lst + accessories_track_lst
        ux = u_x_p + u_x_a
        uy = u_y_p + u_y_a

        # determine how many people detected
        if len(indices) > 0:
            # process bag and count people
            for idx, track in enumerate(track_list):
                bound_box = track[1]

                ID = track[0].split('-')[0]
                class_ID = track[0].split('-')[1]

                bp_curr = None
                ub_curr = None
                hb_curr = None
                t_curr = None
                sc_curr = None
                status_stamp = None
                px_h = None

                # if accessory 
                if class_ID != 'person':
                    # calculate a list of distances between the people and this point
                    person_index, img = distance_2d(ux[idx], uy[idx], u_x_p, u_y_p, img)

                    # if it was not registered as an accessory yet
                    # if exists(ID, arr_d) is False:
                    # Check if key exist in dictionary using any()
                    if any(ID in d_t.values() for d_t in d.values()) is False:
                        # index of the person...
                        curr_person = people_track_lst[person_index]
                        owner_ID = curr_person[0].split('-')[0]

                        # set the new value into the dictionary
                        d[owner_ID][class_ID] = ID

                # add to dictionary (changed to list) if it doesn't exist
                # elif exists(ID, arr_d) is False:
                elif ID not in d.keys():
                    d.update({
                        ID: {'object': class_ID, 'time': time_stamp, 'status': status_stamp, 'height': px_h, 'backpack': bp_curr, 'umbrella': ub_curr, 'handbag': hb_curr, 'tie': t_curr, 'suitcase': sc_curr}
                    })

                    d_bbox.update({
                        ID: {'x1': [round(bound_box[0])], 'y1': [round(bound_box[1])], 'x2': [round(bound_box[2])], 'y2': [round(bound_box[3])]}
                    })
                # every two frames, we append 
                elif clear_ctr % 2:
                    # it's already in the list, we append the position
                    d_bbox[ID]['x1'].append(round(bound_box[0]))
                    d_bbox[ID]['y1'].append(round(bound_box[1]))
                    d_bbox[ID]['x2'].append(round(bound_box[2]))
                    d_bbox[ID]['y2'].append(round(bound_box[3]))

                    # arr_d.append([ID, class_ID, time_stamp, bp_curr, ub_curr, hb_curr, t_curr, sc_curr])
                    # ID_only.append(ID)

        # print(d_bbox)
        # every 20 frames, we remove idle status objects from the dictionaries
        if clear_ctr % 2:
            d, d_bbox = clean_bbox_dict(d, d_bbox)

        # print(d)
        # print(ID_only)
        num_people = len(people_track_lst)

        # get time stamp
        img = write_stats(img, num_people, time_stamp, update_text_font)

        if verbose: logger.debug(f'number of people: {num_people}, time of day: {time_stamp}')

        # draw line for people counting
        img = draw_line(img, x1_loc, y1_loc, x2_loc, y2_loc, (0, 0, 255), 5)

        cv.imshow('window', img)
                # stop demo by pressing 'q'
        if cv2.waitKey(int(1000*dt)) & 0xFF == ord('q'):
            break

        img_array.append(img)

        if args.SlowMode:
            input("Press Enter to continue...")
            with open('shop.json', 'w') as json_file:
                json.dump(d, json_file, indent=4)
        
        # uncomment to route!
        if args.flask_bool: return Response(response=str(d), status=200,mimetype="application/json")
Exemplo n.º 15
0
def track_hoa_df(
    hoa_dets,
    dt=0.02,
    start_frame=0,
    end_frame=100,
    video_id=None,
    verbose=True,
    object_only=False,
    keep_longest=True,
):
    """
    Args:
        keep_longest (bool): find longest object track sequence
    """
    # Initialize track lists and tracker
    obj_tracker = MultiObjectTracker(dt=dt)
    tracked_obj = []

    if not object_only:
        lh_tracker = MultiObjectTracker(dt=dt)
        rh_tracker = MultiObjectTracker(dt=dt)

        # Intialize tracked dicts
        tracked_lh = []
        tracked_rh = []

    # Last non-empty df
    for frame_idx in tqdm(range(start_frame, end_frame)):
        hoa_df = hoa_dets[hoa_dets.frame == frame_idx]
        obj_df = hoa_df[hoa_df.det_type == "object"]
        obj_dets = [
            Detection(gethoa.row2box(row)) for _, row in obj_df.iterrows()
        ]
        obj_tracker.step(detections=obj_dets)
        tracked_obj.extend(
            trackconv.track2dicts(
                obj_tracker.active_tracks(),
                frame_idx,
                video_id=video_id,
                det_type="object",
            )
        )
        if not object_only:
            lh_df = hoa_df[
                (hoa_df.det_type == "hand") & (hoa_df.side == "left")
            ]
            rh_df = hoa_df[
                (hoa_df.det_type == "hand") & (hoa_df.side == "right")
            ]
            lh_dets = [
                Detection(gethoa.row2box(row)) for _, row in lh_df.iterrows()
            ]
            rh_dets = [
                Detection(gethoa.row2box(row)) for _, row in rh_df.iterrows()
            ]
            lh_tracker.step(detections=lh_dets)
            rh_tracker.step(detections=rh_dets)
            tracked_lh.extend(
                trackconv.track2dicts(
                    lh_tracker.active_tracks(),
                    frame_idx,
                    video_id=video_id,
                    det_type="hand",
                    side="left",
                )
            )
            tracked_rh.extend(
                trackconv.track2dicts(
                    rh_tracker.active_tracks(),
                    frame_idx,
                    video_id=video_id,
                    det_type="hand",
                    side="right",
                )
            )
    if verbose:
        obj_tracks = pd.DataFrame(tracked_obj)
        if keep_longest:
            longest_track_idx = (
                obj_tracks.groupby("track_id").frame.nunique().idxmax()
            )
            # Filter object which has longest track
            tracked_obj = obj_tracks[obj_tracks.track_id == longest_track_idx]
        print_track_info(tracked_obj)
        if not object_only:
            lh_tracks = pd.DataFrame(tracked_lh)
            rh_tracks = pd.DataFrame(tracked_rh)
            print_track_info(lh_tracks, track_type="left hand")
            print_track_info(rh_tracks, track_type="right hand")
            tracked_hoa = pd.DataFrame(
                tracked_obj.to_dict("records") + tracked_lh + tracked_rh
            )
        else:
            tracked_hoa = pd.DataFrame(tracked_obj)
        if keep_longest:
            start_track_frame = tracked_obj.frame.min()
            end_track_frame = tracked_obj.frame.max()
            # Keep only region that focuses on longest track
            tracked_hoa = tracked_hoa[
                (tracked_hoa.frame >= start_track_frame)
                & (tracked_hoa.frame <= end_track_frame)
            ]
    return tracked_hoa