Exemplo n.º 1
0
    def detection_gpu(self, model, frame, cnt):
#        print('Detecting...')
        start_time = time.time()
        frame_tensor = cv_image2tensor(frame, self.input_size).unsqueeze(0)
        frame_tensor = Variable(frame_tensor)

        if self.cuda:
            frame_tensor = frame_tensor.cuda()

        detections = self.model(frame_tensor, self.cuda).cpu()
        detections = process_result(detections, self.confidence, self.nms_thresh)
        print("number of detected objects: ", len(detections))
        a = [[] for _ in range(len(detections))]
        if len(detections) != 0:
            detections = transform_result(detections, [frame], self.input_size)
#            for detection in detections:
            for idx, detection in enumerate(detections):
                a[idx].append(float(detection[6])) # prediction score
                a[idx].append(self.classes[int(detection[-1])]) # prediction class
                a[idx].append(int(detection[1])) # x1
                a[idx].append(int(detection[2])) # y1
                a[idx].append(int(detection[3])) # x2 
                a[idx].append(int(detection[4])) # y2
#                print(a)
                self.draw_bbox([frame], detection, self.colors, self.classes)
        # save frames if you need to.
#        cv2.imwrite('frame'+cnt+'.jpg', frame)
        end_time = time.time()
        print("[INFO] detection done. It took "+str(end_time-start_time)+" seconds")
        # self.logfile3.write(str(cnt)+"\t"+str(len(detections))+"\t"+str(a)+"\t"+str(end_time-start_time)+"\n")
        self.logfile3.write(str(cnt)+"\t"+str(len(detections))+"\t"+str(a)+"\t"+str(end_time-start_time)+"\n")
Exemplo n.º 2
0
def detect_image(model, args):

    print('Loading input image(s)...')
    input_size = [int(model.net_info['height']), int(model.net_info['width'])]
    batch_size = 1  #int(model.net_info['batch'])

    imlist, imgs = load_images(args.input)
    print('Input image(s) loaded')
    print("Loaded {} images".format(len(imgs)))

    img_batches = create_batches(imgs, batch_size)

    # load colors and classes
    colors = pkl.load(open("pallete", "rb"))
    classes = load_classes("cfg/garb.names")

    if not osp.exists(args.outdir):
        os.makedirs(args.outdir)

    start_time = datetime.now()
    print('Detecting...')

    detected_trush = []

    for batchi, img_batch in tqdm(enumerate(img_batches)):
        img_tensors = [cv_image2tensor(img, input_size) for img in img_batch]
        img_tensors = torch.stack(img_tensors)
        img_tensors = Variable(img_tensors)
        if args.cuda:
            img_tensors = img_tensors.cuda()
        detections = model(img_tensors, args.cuda).cpu()
        detections = process_result(detections, args.obj_thresh,
                                    args.nms_thresh)

        detected_trush.append(len(detections))
        if len(detections) == 0:
            continue

        detections = transform_result(detections, img_batch, input_size)

        for detection in detections:
            draw_bbox(img_batch, detection, colors, classes, 0, args.outdir)

        for i, img in enumerate(img_batch):
            save_path = osp.join(args.outdir,
                                 osp.basename(imlist[batchi * batch_size + i]))
            cv2.imwrite(save_path, img)
            print(save_path, 'saved')

    end_time = datetime.now()
    print('Detection finished in %s' % (end_time - start_time))

    return detected_trush
Exemplo n.º 3
0
def detect_image(model, args):

    print('Loading input image(s)...')
    input_size = [int(model.net_info['height']), int(model.net_info['width'])]
    batch_size = int(model.net_info['batch'])

    imlist, imgs = load_images(args.input)
    print('Input image(s) loaded')

    img_batches = create_batches(imgs, batch_size)

    # load colors and classes
    colors = pkl.load(
        open(
            "/home/autonomous-car/Desktop/Autonomous_car_ros2/src/stereo_yolo3/stereo_yolo3/pallete",
            "rb"))
    classes = load_classes(
        "/home/autonomous-car/Desktop/Autonomous_car_ros2/src/stereo_yolo3/stereo_yolo3/data/coco.names"
    )

    if not osp.exists(args.outdir):
        os.makedirs(args.outdir)

    start_time = datetime.now()
    print('Detecting...')
    for batchi, img_batch in enumerate(img_batches):
        img_tensors = [cv_image2tensor(img, input_size) for img in img_batch]
        img_tensors = torch.stack(img_tensors)
        img_tensors = Variable(img_tensors)
        if args.cuda:
            img_tensors = img_tensors.cuda()
        detections = model(img_tensors, args.cuda).cpu()
        detections = process_result(detections, args.obj_thresh,
                                    args.nms_thresh)
        if len(detections) == 0:
            continue

        detections = transform_result(detections, img_batch, input_size)
        for detection in detections:
            draw_bbox(img_batch, detection, colors, classes)

        for i, img in enumerate(img_batch):
            save_path = osp.join(
                args.outdir,
                'dete_' + osp.basename(imlist[batchi * batch_size + i]))
            cv2.imwrite(save_path, img)

    end_time = datetime.now()
    print('Detection finished in %s' % (end_time - start_time))

    return
    def detect_image(self,path,colors=[(39, 129, 113), (164, 80, 133), (83, 122, 114)],classes=['container_small', 'garbage_bag', 'cardboard']):

        print('Loading input image(s)...')
        input_size = [int(self.model.net_info['height']), int(self.model.net_info['width'])]
        batch_size = int(self.model.net_info['batch'])

        imlist, imgs = load_images(path)
        print('Input image(s) loaded')

        img_batches = create_batches(imgs, batch_size)


        start_time = datetime.now()
        print('Detecting...')

        all_images_attributes = []

        for batchi, img_batch in enumerate(img_batches):
            img_tensors = [cv_image2tensor(img, input_size) for img in img_batch]
            img_tensors = torch.stack(img_tensors)
            img_tensors = Variable(img_tensors)
            if self.cuda:
                img_tensors = img_tensors.cuda()
            detections = self.model(img_tensors, self.cuda).cpu()
            detections = process_result(detections, self.obj_thresh, self.nms_thresh)
            if len(detections) == 0:
                continue

            detections = transform_result(detections, img_batch, input_size)

            boxes = []
            for detection in detections:
                boxes.append(create_output_json(img_batch, detection, colors, classes))

            images_attributes = {}
            images_attributes['image_meta'] = {'width':input_size[1],'height':input_size[0]}
            images_attributes['boxes'] = boxes

            images_attributes['counts'] = {x:0 for x in classes}
            images_attributes['counts']['total'] = 0
            
            for box in boxes:
                images_attributes['counts'][box['class']] +=1
                images_attributes['counts']['total'] +=1

            all_images_attributes.append(images_attributes)

        end_time = datetime.now()
        print('Detection finished in %s' % (end_time - start_time))
        return all_images_attributes
Exemplo n.º 5
0
    def detection_gpu_return(self, model, frame, cnt):
#        print('Detecting...')
        personcnt =0
        start_time = time.time()
        frame_tensor = cv_image2tensor(frame, self.input_size).unsqueeze(0)
        frame_tensor = Variable(frame_tensor)

        if self.cuda:
            frame_tensor = frame_tensor.cuda()

        detections = self.model(frame_tensor, self.cuda).cpu()
        detections = process_result(detections, self.confidence, self.nms_thresh)
        
        if len(detections) != 0:
            detections = transform_result(detections, [frame], self.input_size)
#            for detection in detections:
            for idx, detection in enumerate(detections):
                if(self.classes[int(detection[-1])]=="person"):
                    personcnt +=1
        print(personcnt)
        return personcnt
Exemplo n.º 6
0
def detect_frame(model, frame):
    input_size = [int(model.net_info['height']), int(model.net_info['width'])]
    colors = pkl.load(open("pallete", "rb"))
    classes = load_classes("data/coco.names")
    colors = [colors[1]]

    frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0)
    frame_tensor = Variable(frame_tensor)

    frame_tensor = frame_tensor.cuda()

    detections = model(frame_tensor, True).cpu()

    #print(detections.shape)

    #processresult changes the variable 'detections'
    detections = process_result(detections, 0.5, 0.4)
    cls_conf = detections[:, 6].cpu().data.numpy()
    cls_ids = detections[:, 7].cpu().data.numpy()
    #print(detections)
    # print(cls_conf.cpu().data.numpy(), "\n",cls_ids.cpu().data.numpy(),"\n" ,detections)

    # print('Getting here')

    if len(detections) != 0:
        detections = transform_result(detections, [frame], input_size)

    xywh = detections[:, 1:5]
    xywh[:, 0] = (detections[:, 1] + detections[:, 3]) / 2
    xywh[:, 1] = (detections[:, 2] + detections[:, 4]) / 2

    # TODO: width and hieght are double what they're supposed to be and dunno why
    xywh[:, 2] = abs(detections[:, 3] - detections[:, 1]) * 2
    xywh[:, 3] = abs(detections[:, 2] - detections[:, 4]) * 2
    xywh = xywh.cpu().data.numpy(
    )  #-> THe final bounding box that can be replaced in the deepSort
    ######################################################
    #print(xywh)
    return xywh, cls_conf, cls_ids
Exemplo n.º 7
0
def detect_video(model, args):

    input_size = [int(model.net_info['height']), int(model.net_info['width'])]

    colors = pkl.load(open("pallete", "rb"))
    classes = load_classes("data/coco.names")
    colors = [colors[1]]
    if args.webcam:
        cap = cv2.VideoCapture(int(args.input))
        output_path = osp.join(args.outdir, 'det_webcam.avi')
    else:
        cap = cv2.VideoCapture(args.input)
        output_path = osp.join(
            args.outdir,
            'det_' + osp.basename(args.input).rsplit('.')[0] + '.avi')

    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(
        cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    read_frames = 0

    start_time = datetime.now()
    print('Detecting...')
    while cap.isOpened():
        retflag, frame = cap.read()
        read_frames += 1
        if retflag:
            frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0)
            frame_tensor = Variable(frame_tensor)

            if args.cuda:
                frame_tensor = frame_tensor.cuda()

            detections = model(frame_tensor, args.cuda).cpu()
            detections = process_result(detections, args.obj_thresh,
                                        args.nms_thresh)
            if len(detections) != 0:
                detections = transform_result(detections, [frame], input_size)
                for detection in detections:
                    draw_bbox([frame], detection, colors, classes)

            if not args.no_show:
                cv2.imshow('frame', frame)
            out.write(frame)
            if read_frames % 2 == 0:
                print('Number of frames processed:', read_frames)
            if not args.no_show and cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

    end_time = datetime.now()
    print('Detection finished in %s' % (end_time - start_time))
    print('Total frames:', read_frames)
    cap.release()
    out.release()
    if not args.no_show:
        cv2.destroyAllWindows()

    print('Detected video saved to ' + output_path)

    return
def detect_video(model):
    kinect = PyKinectRuntime.PyKinectRuntime(PyKinectV2.FrameSourceTypes_Depth)

    input_size = [int(model.net_info['height']), int(model.net_info['width'])]
    colors = pkl.load(open("yolov3-master\pallete", "rb"))
    classes = load_classes("yolov3-master\data\coco.names")
    colors = [colors[1]]

    # cap is the video captured by camera
    # for surface 0 is front 1 is back 2 is kinect
    cap = cv2.VideoCapture(1 + cv2.CAP_DSHOW)

    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(
        cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fps = cap.get(cv2.CAP_PROP_FPS)
    #print(fps)

    read_frames = 0

    start_time = datetime.now()
    #print('Start Detect')
    #set times as 1
    #num=0
    #sample array
    #s=[[] for i in range(20)]
    #s=[]
    #!while loop!
    locationx = []
    locationy = []
    locationd = []
    while cap.isOpened():

        #print('Detecting')
        retflag, frame = cap.read()
        '''
        frame need to be corpped and then resize to 512*424
        '''

        cv2.circle(frame, (960, 540), 5, [0, 255, 255], thickness=-1)
        #2 feet-41
        #3 feet-41
        #4 feet-37
        #x=154
        y = 0
        h = 1080
        #h=1272
        #w=1611
        w = 1920 / 84.1 * 70.6
        w = int(w)
        x = (1920 - w) / 2 + 140
        x = int(x)
        dim = (512, 380)
        frame = frame[y:y + h, x:x + w]
        frame = cv2.resize(frame, dim)

        read_frames += 1
        if retflag:
            '''
            get depth frame
            '''
            if kinect.has_new_depth_frame():
                Dframe = kinect.get_last_depth_frame()
                frameD = kinect._depth_frame_data
                Dframe = Dframe.astype(np.uint8)
                #print(frame)

                Dframe = np.reshape(Dframe, (424, 512))
                dx = 0
                dy = 22
                dh = 380
                dw = 512
                dim = (512, 380)
                Dframe = Dframe[dy:dy + dh, dx:dx + dw]
                frame = cv2.resize(frame, dim)

                Dframe = cv2.cvtColor(Dframe, cv2.COLOR_GRAY2RGB)

                def click_event(event, x, y, flags, param):
                    if event == cv2.EVENT_RBUTTONDOWN:
                        print(x, y)
                    if event == cv2.EVENT_LBUTTONDOWN:
                        Pixel_Depth = frameD[(((22 + y) * 512) + x)]
                        print("x ", x, "y ", y, "Depth", Pixel_Depth)

                '''
                get RGB frame
                '''
            frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0)
            frame_tensor = Variable(frame_tensor)

            #if torch.cuda.is_available:
            frame_tensor = frame_tensor.cuda()
            detections = model(frame_tensor, True).cpu()
            #orange order
            #flag0=0
            flag1 = 0
            detections = process_result(detections, 0.5, 0.4)
            if len(detections) != 0:
                #3.3
                global flag
                print(flag)
                time.sleep()
                detections = transform_result(detections, [frame], input_size)
                num = len(detections)
                for detection in detections:
                    Label = int(detection[-1])
                    #flag=flag+1
                    if Label == 49:
                        flag1 = flag1 + 1
                        #detection=[]
                        center = draw_bbox([frame], detection, colors, classes)
                        #print(Label)
                        #print('cc is',center)
                        Dcenter = draw_bbox([Dframe], detection, colors,
                                            classes)
                        #import k
                        #k=0.105
                        #x,y,d=get_depth(center,kinect,k)
                        #redraw the boundary box
                        img = Dframe
                        nx, x, y, d = get_depth(center, kinect)
                        cv2.circle(img, (nx, y), 5, [0, 0, 255], thickness=-1)
                        #send to robot
                        x = center[0]
                        y = center[1]
                        x = x - 512 / 2 + 40
                        y = y - 318 / 2 - 35
                        y = -y
                        x = -x
                        #position
                        print("x ", x, "y ", y, "d ", d)
                        l = len(locationx)
                        #if True:
                        #pick up the orange
                        #for the nth orange(n==flag)
                        if flag1 == flag:
                            #approciate depth
                            if d > 500 and d < 1050:
                                locationx.append(x)
                                locationy.append(y)
                                locationd.append(d)
                                print(locationx)
                                print(l)
                                if l > 6:
                                    x1 = locationx[l - 6:l - 1]
                                    y1 = locationy[l - 6:l - 1]
                                    d1 = locationd[l - 6:l - 1]
                                    diff = np.var(x1) + np.var(y1) + np.var(d1)
                                    print(x1)
                                    print(y1)
                                    print(d1)
                                    print(diff)
                                    # less fluctuation
                                    if diff < 20:
                                        print("get position")
                                        x = np.average(x1)
                                        y = np.average(y1)
                                        d = np.average(d1)
                                        #scale x,y
                                        k1 = 1.48
                                        x = x * k1 * d / 512
                                        y = y * k1 * d / 318 * 0.72
                                        #x=x*k1
                                        #y=y*k1
                                        #actual position
                                        print(flag, x, y, d)
                                        #send to raspbaerry pi
                                        flag = ClientSocket(num, flag, x, y, d)
                                        #reset the data
                                        locationx = []
                                        locationy = []
                                        locationd = []

                        #choose stable on as result
                        #sample function()
                        #num=num+1

            cv2.imshow('RGBFrame', frame)
            cv2.imshow('DepthFrame', Dframe)

            #print("x: ", x ,"y: ", y)
            #print("_______________________")

            if read_frames % 30 == 0:
                print('Number of frames processed:', read_frames)

                #print('average FPS',float(read_frames/datetime.now()))
            #if flag0:
            #locationx=[]
            #locationy=[]
            #locationd=[]

            if not False and cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break
    end_time = datetime.now()
    print('Detection finished in %s' % (end_time - start_time))
    print('Total frames:', read_frames)
    cap.release()

    cv2.destroyAllWindows()

    return
def detect_video(model, args):

    input_size = [int(model.net_info['height']), int(model.net_info['width'])]
    #!2020.1.5 change direction
    colors = pkl.load(open("yolov3-master\pallete", "rb"))
    classes = load_classes("yolov3-master\data\coco.names")
    colors = [colors[1]]
    #1.30
    if args.webcam:
        cap = cv2.VideoCapture(int(args.input)+ cv2.CAP_DSHOW)
        #0 is front cam, 1 is back cam
        output_path = osp.join(args.outdir, 'det_webcam.avi')
    else:
        cap = cv2.VideoCapture(args.input)
        output_path = osp.join(args.outdir, 'det_' + osp.basename(args.input).rsplit('.')[0] + '.avi')
    #width height
    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print("wid: ",width,"hei:", height)
    #2020.1.10 FPS
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    read_frames = 0

    start_time = datetime.now()
    print('Start Detect')
    #!while loop!
    while cap.isOpened():
        ##print('Detecting')
        #print(fps)
        retflag, frame = cap.read()
        dim = (512, 424)
        frame=cv2.resize(frame,dim,interpolation = cv2.INTER_AREA)
        print(frame.shape)

        read_frames += 1
        if retflag:
            frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0)
            frame_tensor = Variable(frame_tensor)

            if args.cuda:
                frame_tensor = frame_tensor.cuda()

            detections = model(frame_tensor, args.cuda).cpu()
            detections = process_result(detections, args.obj_thresh, args.nms_thresh)
            if len(detections) != 0:
                detections = transform_result(detections, [frame], input_size)
                for detection in detections:
                    
                    draw_bbox([frame], detection, colors, classes)
            #1.30
            #if not args.no_show:
                #cv2.imshow('frame', frame)
            cv2.imshow('frame',frame)
            out.write(frame)
            if read_frames % 30 == 0:
                print('Number of frames processed:', read_frames)
                
            if not args.no_show and cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

    end_time = datetime.now()
    print('Detection finished in %s' % (end_time - start_time))
    print('Total frames:', read_frames)
    cap.release()
    out.release()
    if not args.no_show:
        cv2.destroyAllWindows()

    print('Detected video saved to ' + output_path)

    return
Exemplo n.º 10
0
def detect_video(model, args):

    # draw_bbox([frame], detection, colors, classes)
    input_size = [int(model.net_info['height']), int(model.net_info['width'])]

    colors = pkl.load(open("pallete", "rb"))
    classes = load_classes("data/coco.names")
    colors = [colors[1]]
    if args.webcam:
        cap = cv2.VideoCapture(int(args.input))
        output_path = osp.join(args.outdir, 'det_webcam.avi')
    else:
        cap = cv2.VideoCapture(args.input)
        output_path = osp.join(
            args.outdir,
            'det_' + osp.basename(args.input).rsplit('.')[0] + '.avi')

    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(
        cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    #TODO: Change output path?
    out = cv2.VideoWriter("output.avi", fourcc, fps, (width, height))
    read_frames = 0

    mot_tracker = Sort()

    start_time = datetime.now()
    print('Detecting...')
    while cap.isOpened():
        retflag, frame = cap.read()
        read_frames += 1
        if retflag:
            frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0)
            frame_tensor = Variable(frame_tensor)

            if args.cuda:
                frame_tensor = frame_tensor.cuda()

            detections = model(frame_tensor, args.cuda).cpu()

            #print(detections.shape)

            #processresult changes the variable 'detections'
            # print(detections)
            #print(args.obj_thresh)
            #print(args.nms_thresh)
            detections = process_result(detections, 0.5, 0.4)
            cls_confs = detections[:, 6].cpu().data.numpy()
            cls_ids = detections[:, 7].cpu().data.numpy()

            if len(detections) != 0:
                detections = transform_result(detections, [frame], input_size)
                #for detection in detections:

                xywh = detections[:, 1:5]
                '''
                xywh[:, 0] = (detections[:, 1] + detections[:, 3]) / 2
                xywh[:, 1] = (detections[:, 2] + detections[:, 4]) / 2
                
                # TODO: width and hieght are double what they're supposed to be and dunno why
                xywh[:, 2] = abs(detections[:, 3] - detections[:, 1]) *2
                xywh[:, 3] = abs(detections[:, 2] - detections[:, 4]) *2
                '''
                xywh = xywh.cpu().data.numpy(
                )  #-> THe final bounding box that can be replaced in the deepSort
                ######################################################
                #print(xywh.shape)
                #print(cls_confs.shape)
                #num_dets, temp = xywh.shape
                #Convert to MOT format
                xs = xywh[:, 0]
                ys = xywh[:, 1]
                ws = xywh[:, 2]
                hs = xywh[:, 3]
                #new_xs = xs - ws/2
                #new_ys = ys - hs/2

                MOT16_bbox = np.empty((0, 5))

                for cls_id, cls_conf, x, y, w, h in zip(
                        cls_ids, cls_confs, xs, ys, ws, hs):
                    #MOT16_temp = [read_frames, cls_id, x, y, w, h, cls_conf, -1, -1, -1]
                    MOT16_temp = [x, y, w, h, cls_conf]
                    np.set_printoptions(precision=2, linewidth=150)
                    MOT16_bbox = np.append(MOT16_bbox, [MOT16_temp], axis=0)
                """ 
                for i in range(num_dets):
                    # what exactly is read_frames
                    MOT16_temp = [xywh[i][0], xywh[i][1], xywh[i][2], xywh[i][3]]
                """
                #print("bboxinput: ", MOT16_bbox)
                tracking_boxes = mot_tracker.update(MOT16_bbox)
                #print("output: ", tracking_boxes)
                #print("-------------------NEW BOX-------------------------")
                for tracking_box in tracking_boxes:
                    #print(tracking_box)
                    draw_mot_bbox(frame, torch.from_numpy(tracking_box),
                                  colors, classes)
                #print("------------------END BOX--------------------------")
            out.write(frame)
            if read_frames % 30 == 0:
                print('Number of frames processed:', read_frames)
        else:
            break

    end_time = datetime.now()
    print('Detection finished in %s' % (end_time - start_time))
    print('Total frames:', read_frames)
    # print('MOT16_bbox: \n', MOT16_bbox)
    cap.release()
    out.release()

    print('Detected video saved to ' + output_path)

    return
Exemplo n.º 11
0
def detect_image(model, args):
    countbin = 0
    countgar = 0
    counttrash = 0
    a=[]
    print('Loading input image(s)...')
    input_size = [int(model.net_info['height']), int(model.net_info['width'])]
    batch_size = int(model.net_info['batch'])

    imlist, imgs = load_images(args.input)
    print('Input image(s) loaded')

    img_batches = create_batches(imgs, batch_size)

    # load colors and classes
    colors = pkl.load(open("pallete", "rb"))
    classes = load_classes("cfg/obj.names")

    if not osp.exists(args.outdir):
        os.makedirs(args.outdir)

    start_time = datetime.now()
    print('Detecting...')
    for batchi, img_batch in enumerate(img_batches):
        img_tensors = [cv_image2tensor(img, input_size) for img in img_batch]
        img_tensors = torch.stack(img_tensors)
        img_tensors = Variable(img_tensors)
        if args.cuda:
            img_tensors = img_tensors.cuda()
        detections = model(img_tensors, args.cuda).cpu()
        detections = process_result(detections, args.obj_thresh, args.nms_thresh)
        if len(detections) == 0:
            continue

        detections = transform_result(detections, img_batch, input_size)
        for detection in detections:
            draw_bbox(img_batch, detection, colors, classes)

            #la = classes[int(detection[-1])]
            #if la == 'bin':
                #countbin = countbin +1
            #if (la == 'plastic_bag' ):
                #countgar = countgar +1
            #if (la == 'trash'):
                #counttrash = counttrash+1
            if (countgar >= 3 or counttrash >= 5):
                p1 = tuple(detection[1:3].int())
                p2 = tuple(detection[3:5].int())
                combined_array=np.append(p1, p2)
                combined_list=combined_array.tolist()
                a.append(combined_list)
                result=cv2.groupRectangles(a,1,0.85)
                for (x,y,w,h) in result[0]:
  
                    img = img_batch[int(detection[0])]
     
        #print("---------Sum Amount-----------" )        
        #print("bin = " , countbin)
        #print("plastic_bag = " , countgar)
        #print("counttrash = " , counttrash)


        for i, img in enumerate(img_batch):
            save_path = osp.join(args.outdir, 'det_' + osp.basename(imlist[batchi*batch_size + i]))
            cv2.imwrite(save_path, img)
            print(save_path, 'saved')

    end_time = datetime.now()
    print('Detection finished in %s' % (end_time - start_time))

    return
Exemplo n.º 12
0
def detect_video(model, args):

    # draw_bbox([frame], detection, colors, classes)
    input_size = [int(model.net_info['height']), int(model.net_info['width'])]

    colors = pkl.load(open("pallete", "rb"))
    classes = load_classes("data/coco.names")
    colors = [colors[1]]
    if args.webcam:
        cap = cv2.VideoCapture(int(args.input))
        output_path = osp.join(args.outdir, 'det_webcam.avi')
    else:
        cap = cv2.VideoCapture(args.input)
        output_path = osp.join(
            args.outdir,
            'det_' + osp.basename(args.input).rsplit('.')[0] + '.avi')

    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(
        cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    read_frames = 0

    start_time = datetime.now()
    print('Detecting...')
    while cap.isOpened():
        retflag, frame = cap.read()
        read_frames += 1
        if retflag:
            frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0)
            frame_tensor = Variable(frame_tensor)

            if args.cuda:
                frame_tensor = frame_tensor.cuda()

            detections = model(frame_tensor, args.cuda).cpu()

            print(detections.shape)

            #processresult changes the variable 'detections'
            # print(detections)
            #print(args.obj_thresh)
            #print(args.nms_thresh)
            detections = process_result(detections, args.obj_thresh,
                                        args.nms_thresh)

            if len(detections) != 0:
                detections = transform_result(detections, [frame], input_size)
                for detection in detections:
                    draw_bbox([frame], detection, colors, classes)
                    #print(detection)
                xywh = detections[:, 1:5]
                xywh[:, 0] = (detections[:, 1] + detections[:, 3]) / 2
                xywh[:, 1] = (detections[:, 2] + detections[:, 4]) / 2

                # TODO: width and hieght are double what they're supposed to be and dunno why
                xywh[:, 2] = abs(detections[:, 3] - detections[:, 1])  #*2
                xywh[:, 3] = abs(detections[:, 2] - detections[:, 4])  #*2
                xywh = xywh.cpu().data.numpy(
                )  #-> THe final bounding box that can be replaced in the deepSort
            ######################################################
            #print ("xy: \n", xywh)
            out.write(frame)
            if read_frames % 30 == 0:
                print('Number of frames processed:', read_frames)
        else:
            break

    end_time = datetime.now()
    print('Detection finished in %s' % (end_time - start_time))
    print('Total frames:', read_frames)
    cap.release()
    out.release()

    print('Detected video saved to ' + output_path)

    return
Exemplo n.º 13
0
def detect_video(model, args):
    input_size = [int(model.net_info['height']), int(model.net_info['width'])]

    colors = pkl.load(
        open(
            "/home/autonomous-car/Desktop/Autonomous_car_ros2/src/stereo_yolo3/stereo_yolo3/pallete",
            "rb"))
    classes = load_classes(
        "/home/autonomous-car/Desktop/Autonomous_car_ros2/src/stereo_yolo3/stereo_yolo3/data/coco.names"
    )
    colors = [colors[1]]
    if args.webcam:
        cap1 = cv2.VideoCapture(int(args.input))
        cap2 = cv2.VideoCapture(int(args.input) + 2)
        output_path = osp.join(args.outdir, 'dete_webcam.avi')
    else:
        cap1 = cv2.VideoCapture(int(args.input))
        cap2 = cv2.VideoCapture(int(args.input) + 2)
        output_path = osp.join(
            args.outdir,
            'dete_' + osp.basename(args.input).rsplit('.')[0] + '.avi')

    cap1.set(3, desired_width)
    cap1.set(4, desired_height)
    cap2.set(3, desired_width)
    cap2.set(4, desired_height)
    width, height = int(cap1.get(3)) * 2, int(cap1.get(4))
    fps = cap1.get(cv2.CAP_PROP_FPS)
    print(width)
    print(height)

    print('Detecting...')

    while cap1.isOpened():
        retflag, frame1 = cap1.read()
        retflag, frame2 = cap2.read()
        frame = np.concatenate((frame1, frame2), axis=1)

        # edge detection
        edge = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        edge = cv2.Canny(edge, 100, 200)

        if retflag:
            frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0)
            frame_tensor = Variable(frame_tensor)

            if args.cuda:
                frame_tensor = frame_tensor.cuda()

            detections = model(frame_tensor, args.cuda).cpu()
            detections = process_result(detections, args.obj_thresh,
                                        args.nms_thresh)
            if len(detections) != 0:
                detections = transform_result(detections, [frame], input_size)
                for detection in detections:
                    draw_bbox([frame], detection, colors, classes)

            if not args.no_show:
                cv2.imshow('frame', frame)

            gray_scale = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            stereo_image_pub.image_pub(gray_scale, edge)

            if not args.no_show and cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

    cap1.release()
    cap2.release()
    if not args.no_show:
        cv2.destroyAllWindows()

    return
Exemplo n.º 14
0
    def image_dequeue_proc(self):
        framecnt = 0
        endcnt =0 # if idle for 2 minutes, save and quit.
        frame_start_time = time.time()
        self.encode_param = [int(cv2.IMWRITE_JPEG_QUALITY),99]
        while (True):
             if (self.imgq.empty()):
                if(endcnt >= 120):
                    self.logfile.close()
                    self.logfile2.close()
                    print("byebye")
                    sys.exit(0)
                else:
                    print('nothing in q, sleeping..')
                    time.sleep(1)
                    endcnt += 1

             else: # lets get 10 to speed up the process. not now though...
                device_name = self.dev_nameq.get()
                counter = int(self.framecntq.get())
                frame = self.imgq.get()
                ty = self.typeq.get()
                if self.cuda:
                    if ty  == 'img': # just detection, nothing else
                        self.detection_gpu(self.model, frame, cnt)

                    elif ty == 'img_tracking':# for tracking
                        self.width = frame.shape[0]
                        self.height = frame.shape[1]
                        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                        positions = []
                        if int(counter) % self.frame_skip == 0:
                            self.trackers = []
                            frame_tensor = cv_image2tensor(frame, self.input_size).unsqueeze(0)
                            frame_tensor = Variable(frame_tensor)

                            if self.cuda:
                                frame_tensor = frame_tensor.cuda()

                            detections = self.model(frame_tensor, self.cuda).cpu()
                            detections = process_result(detections, self.confidence, self.nms_thresh)
        
                            if len(detections) != 0:
                                detections = transform_result(detections, [frame], self.input_size)
                            #for detection in detections:
                                for idx, detection in enumerate(detections):
                                    if (self.classes[int(detection[-1])]=="person"):
                                        if float(detection[6]) > self.confidence:
                                            print("foound!")
                                            pre_score = (float(detection[6])) # prediction score
                                            pre_class = (self.classes[int(detection[-1])]) # prediction class
                                            pre_x1 = (int(detection[1])) # x1
                                            pre_y1 = (int(detection[2])) # y1
                                            pre_x2 = (int(detection[3])) # x2 
                                            pre_y2 = (int(detection[4])) # y2

                                            #self.draw_bbox([frame], detection, self.colors, self.classes)
                                            tracker = dlib.correlation_tracker()
                                            rect = dlib.rectangle(pre_x1, pre_y1, pre_x2, pre_y2)
                                            tracker.start_track(rgb, rect)
                                            self.trackers.append(tracker)
                        else: 
                            for tracker in self.trackers: 
                                tracker.update(rgb)
                                pos = tracker.get_position()
                                startX = int(pos.left())
                                startY = int(pos.top())
                                endX = int(pos.right())
                                endY = int(pos.bottom())
                                positions.append((startX, startY, endX, endY))

                        objects = self.ct.update(positions)

                        for (objectID, centroid) in objects.items():
                            to = self.trobs.get(objectID, None)
                            #self.ct.getall()
                            if (self.ct.checknumberofexisting()):
                                self.sumframebytes+=sys.getsizeof(frame)
                            #self.ct.predict(objectID, 30)

                            if to == None:
                                to = trackableobject.TrackableObject(objectID, centroid)
                            else:

                                cv2.circle(frame, (centroid[0],centroid[1]),4,(255,255,255),-1)
                                y = [c[1] for c in to.centroids]
                                x = [c[0] for c in to.centroids]
                                dirY = centroid[1] - np.mean(y)
                                dirX = centroid[0] - np.mean(x)
                                to.centroids.append(centroid)
                    
                                if not to.counted:
                                    if (self.tr == "dr"):
                                        prex, prey = self.ct.predict(objectID, 30)
                                        print("predicted obj movement..x,y: ", prex, prey)
                                        if(self.checkboundary_dir(prex, prey)=="R"):
                                            print("we need to send msg to right")
                                            p = self.ct.get_object_rect_by_id(objectID) # x1, y1, x2, y2
                                            if (p[0]<=0 or p[1] <= 0 or p[2] <= 0 or p[3] <=0):
                                                pass
                                            else:
                                                cv2.rectangle(frame, (p[0], p[1]), (p[2], p[3]), (255,0,0),2)
                                                croppedimg = frame[p[1]:p[3], p[0]:p[2]]
                                                jsonified_data = MessageBus.create_message_list_numpy_handoff(croppedimg, self.encode_param, device_name, self.timegap)
                                                #self.msg_bus.send_message_str(self.center_device_ip_int, self.center_device_port, jsonified_data)

                                        # sned mesg
                                        elif(self.checkboundary_dir(prex, prey)=="L"):
                                            print("we need to send msg to left")
                                            p = self.ct.get_object_rect_by_id(objectID) # x1, y1, x2, y2
                                            if (p[0]<=0 or p[1] <= 0 or p[2] <= 0 or p[3] <=0):
                                                pass
                                            else:
                                
                                                cv2.rectangle(frame, (p[0], p[1]), (p[2], p[3]), (255,0,0),2)
                                                croppedimg = frame[p[1]:p[3], p[0]:p[2]]
                                                print((p[0], p[1]), (p[2], p[3]))
                                                #cv2.imwrite(str(counter)+".jpg", croppedimg)
                                                jsonified_data = MessageBus.create_message_list_numpy_handoff(croppedimg, self.encode_param, device_name, self.timegap)
                                                #self.msg_bus.send_message_str(self.left_device_ip_int, self.left_device_port, jsonified_data)

                                        elif(self.checkboundary_dir(prex, prey)=="D"):
                                            print("we need to send msg to down")
                                        elif(self.checkboundary_dir(prex, prey)=="U"):
                                            print("we need to send msg to up")

                                    elif (self.tr == "bc"):
                                        self.checkboundary(centroid, objectID)
                                        #print("loc of object in frame: ", objectID, self.boundary[objectID])

                                        self.checkdir(dirX, dirY, objectID)
                                        #print("moving direction of the object: ", objectID, self.objstatus[objectID])

                                        self.where[objectID] = self.checkhandoff(objectID)
                                        # send hand off msg here
                                        if(self.where[objectID] == "RIGHT"):
                                            print("we need to send msg to right")
                                            #print("just throw in the cropped img template")
                                            #print("upon receiving the cropped img, do the template matching & add to tracking dlib queue")
                                            p = self.ct.get_object_rect_by_id(objectID) # x1, y1, x2, y2
                                            #t = self.ct.objects[objectID]  #centroid x, y
                                            #print("type p: ", type(p)) # rect
                                            #print("t: ", t) # centroid
                                            cv2.rectangle(frame, (p[0], p[1]), (p[2], p[3]), (255,0,0),2)
                                            #croppedimg = frame[y1:y2, x1: x2]
                                            croppedimg = frame[p[1]:p[3], p[0]:p[2]]
                                            jsonified_data = MessageBus.create_message_list_numpy_handoff(croppedimg, self.encode_param, device_name, self.timegap)
                                            self.msg_bus.send_message_str(self.center_device_ip_int, self.center_device_port, jsonified_data)

                            
                                        elif (self.where[objectID]== "LEFT"):
                                            print("we need to send msg to left")
                                            p = self.ct.get_object_rect_by_id(objectID) # x1, y1, x2, y2
                                            #t = self.ct.objects[objectID]  #centroid x, y
                                            #print("type p: ", type(p)) # rect
                                            #print("t: ", t) # centroid
                                            cv2.rectangle(frame, (p[0], p[1]), (p[2], p[3]), (255,0,0),2)
                                            #croppedimg = frame[y1:y2, x1: x2]
                                            croppedimg = frame[p[1]:p[3], p[0]:p[2]]
                                            jsonified_data = MessageBus.create_message_list_numpy_handoff(croppedimg, self.encode_param, device_name, self.timegap)
                                            #jsonified_data = MessageBus.create_message_list_numpy_handoff(croppedimg, encode_param, device_name, self.timegap)
                                            self.msg_bus.send_message_str(self.right_device_ip_int, self.right_device_port, jsonified_data)
                                        elif (self.where[objectID]== "TOP"):
                                            print("we need to send msg to top")
                                        elif (self.where[objectID]== "BOTTOM"):
                                            print("we need to send msg to bottom")
                                        else:
                                            print("nothing is happinging")

                            self.trobs[objectID] = to
                            text = "ID {}".format(objectID) +" "+ str(centroid[0]) +" "+ str(centroid[1])
                            cv2.putText(frame, text, (centroid[0]-10, centroid[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255),2) # ID of the object 
                            
                        sendablecnt = 0
#                        cv2.imwrite('frame'+str(counter)+'.jpg', frame)
                        if self.display == "on":
                            cv2.imshow('NCS live inference', frame)
                        if(cv2.waitKey(3) & 0xFF == ord('q')):
                            break

                    elif ty  == 'img_tracking_check':# dummy here
                        #thing = self.detection_gpu_return(self.model, frame, cnt)
                        print('not imp')
                else: # no GPU enabled
                    if ty == 'img':
                        self.detection(frame)
                    elif ty == 'img_tracking':
                        print ("NOT IMP YET")
                        # self.detection_tracking (self.model, self.imgq.get(), cnt)
                    elif ty  == 'img_tracking_check':
                        print ("NOT IMP YET")
                        # thing = self.detection_gpu_return(self.model, self.imgq.get(), cnt)
                curT = datetime.utcnow().strftime('%H:%M:%S.%f') # string format
                decay = datetime.strptime(curT, '%H:%M:%S.%f') - self.timerq.get()

                frame_end_time = time.time()
                cumlative_fps = int(counter) / (frame_end_time - frame_start_time)
                print("estimated dequeue speed: ", str(cumlative_fps)) # i don't think its needed here but in processing thread
                print(str(counter)+"\t"+str(device_name)+"\t"+str(decay.total_seconds()))
def detect_video(model, args):

    input_size = [int(model.net_info['height']), int(model.net_info['width'])]
    mot_tracker = Sort(min_hits=args.min_hits, max_age=args.max_age)
    colors = pkl.load(open("cfg/pallete", "rb"))
    classes = load_classes("cfg/coco.names")

    # TODO: Turn this into an external config file (relevant classes and mapping)
    relevant_classes = [
        "boat"
    ]
    relevant_classes_indices = [classes.index(cls) for cls in relevant_classes]

    # If you want to merge classes together
    class_mapping = {
        classes.index("boat"): [classes.index(cls) for cls in []]
    }

    if not osp.isdir(args.outdir):
        os.mkdir(args.outdir)

    if args.webcam:
        cap = cv2.VideoCapture(int(args.input))
        output_path = osp.join(args.outdir, 'det_webcam.avi')
    else:
        cap = cv2.VideoCapture(args.input)
        output_path = osp.join(args.outdir, 'det_' + osp.basename(args.input).rsplit('.')[0] + '.avi')

    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_path = output_path.replace('.avi', '.mp4')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    read_frames = 0

    start_time = datetime.now()
    print('Detecting...')

    lod = []
    while cap.isOpened():
        retflag, frame = cap.read()
        read_frames += 1
        if retflag:
            frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0)
            frame_tensor = Variable(frame_tensor)

            if args.cuda:
                frame_tensor = frame_tensor.cuda()

            detections = model(frame_tensor, args.cuda).cpu()
            detections = process_result(detections, args.obj_thresh, args.nms_thresh,
                                        relevant_classes_indices, class_mapping)

            if len(detections) != 0:
                detections = transform_result(detections, [frame], input_size)

            if len(detections) == 0:
                tracked_objects = mot_tracker.update(detections.cpu().detach().numpy())
            else:
                tracked_objects = mot_tracker.update(detections[:, 1:].cpu().detach().numpy())

            if args.debug_trackers:
                for n, tracker in enumerate(mot_tracker.trackers):
                    tracker_bb = tracker.predict()[0]
                    tracker_id = tracker.id
                    draw_simple_bbox(frame, tracker_bb, f"{tracker_id}")
            else:
                if len(tracked_objects) != 0:
                    for obj in tracked_objects:
                        bbox = obj[:4]
                        uid = int(obj[4])
                        cls_ind = int(obj[5])
                        draw_bbox([frame], bbox, uid, cls_ind, colors, classes)
                        lod.append(format_output(bbox, uid, cls_ind, classes, read_frames, output_path, fps))

            if not args.no_show:
                cv2.imshow('frame', frame)
            out.write(frame)
            if read_frames % 30 == 0:
                print(f'Frames processed: {read_frames/total_frames*100:0.2f}%')
            if not args.no_show and cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

    end_time = datetime.now()
    print(f'Detection finished in {end_time - start_time}')
    print('Total frames:', read_frames)
    cap.release()
    out.release()
    if not args.no_show:
        cv2.destroyAllWindows()

    print('Detected video saved to ' + output_path)

    name = output_path.replace('.mp4', '.csv')
    pd.DataFrame(lod).to_csv(name, index=False)
    print('Detected meta data saved as ' + name)
Exemplo n.º 16
0
def detect_video(model, args):

    input_size = [int(model.net_info['height']), int(model.net_info['width'])]

    colors = pkl.load(open("pallete", "rb"))
    classes = load_classes("data/coco.names")
    colors = [colors[1]]
    if args.webcam:
        cap = cv2.VideoCapture(int(args.input))
        output_path = osp.join(args.outdir, 'gpu.avi')
    else:
        cap = cv2.VideoCapture(args.input)
        output_path = osp.join(
            args.outdir,
            'det_' + osp.basename(args.input).rsplit('.')[0] + '.avi')

    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(
        cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    cap.set(3, 1280)
    cap.set(4, 720)

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    read_frames = 0

    start_time = datetime.now()
    print('Detecting...')
    while cap.isOpened():
        retflag, frame = cap.read()
        original_frame = frame
        read_frames += 1
        if retflag:
            frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0)
            frame_tensor = Variable(frame_tensor)

            if args.cuda:
                frame_tensor = frame_tensor.cuda()

            detections = model(frame_tensor, args.cuda).cpu()
            detections = process_result(detections, args.obj_thresh,
                                        args.nms_thresh)
            if len(detections) != 0:
                detections = transform_result(detections, [frame], input_size)
                labels = []
                for detection in detections:
                    labels.append(classes[int(detection[-1])])
                state = state_trigger(labels)
                if state == 0:
                    for detection in detections:
                        draw_bbox([frame], detection, colors, classes)
                    cv2.imshow('frame', frame)
                elif state == 1:
                    frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2GRAY)
                    cv2.putText(frame, 'PHONE DETECTED',
                                bottomLeftCornerOfText, font, fontScale,
                                fontColor, lineType)
                    for detection in detections:
                        if classes[int(detection[-1])] == "cell phone":
                            draw_bbox([frame], detection, colors, classes)
                    cv2.imshow('frame', frame)
                    #sleep(0.65)
                else:
                    cv2.imshow('frame', original_frame)

            #if not args.no_show:
            #cv2.imshow('frame', frame)
            out.write(frame)
            if read_frames % 30 == 0:
                print('Number of frames processed:', read_frames)
            if not args.no_show and cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

    end_time = datetime.now()
    print('Detection finished in %s' % (end_time - start_time))
    print('Total frames:', read_frames)
    cap.release()
    out.release()
    if not args.no_show:
        cv2.destroyAllWindows()

    print('Detected video saved to ' + output_path)

    return