Пример #1
0
def init_model(args):
    scales = args.scales

    images = args.images
    batch_size = int(args.bs)
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0

    num_classes = 80
    classes = load_classes('yolo/data/coco.names')
    print("classes")
    print(classes)

    # Set up the neural network
    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    # If there's a GPU availible, put the model on GPU
    if CUDA:
        model.cuda()

    # Set the model in evaluation mode
    model.eval()
    return model
Пример #2
0
class ObjectDetection(object):
    def __init__(self, batchSize=1):
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.eval()

        self.stopped = False
        self.batchSize = batchSize

    def process(self, img, orig_img, im_name, im_dim_list):
        with torch.no_grad():
            # Human Detection
            img = img
            prediction = self.det_model(img, CUDA=False)
            # NMS process
            dets = dynamic_write_results(prediction,
                                         opt.confidence,
                                         opt.num_classes,
                                         nms=True,
                                         nms_conf=opt.nms_thesh)

            if isinstance(dets, int) or dets.shape[0] == 0:
                return orig_img[0], im_name[0], None, None, None, None, None

            dets = dets.cpu()
            im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long())
            scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                       1)[0].view(-1, 1)

            # coordinate transfer
            dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                im_dim_list[:, 0].view(-1, 1)) / 2
            dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                im_dim_list[:, 1].view(-1, 1)) / 2

            dets[:, 1:5] /= scaling_factor
            for j in range(dets.shape[0]):
                dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                              im_dim_list[j, 0])
                dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                              im_dim_list[j, 1])
            boxes = dets[:, 1:5]
            scores = dets[:, 5:6]

        boxes_k = boxes[dets[:, 0] == 0]
        if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
            return orig_img[0], im_name[0], None, None, None, None, None
        inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW)
        pt1 = torch.zeros(boxes_k.size(0), 2)
        pt2 = torch.zeros(boxes_k.size(0), 2)
        return orig_img[0], im_name[0], boxes_k, scores[dets[:, 0] ==
                                                        0], inps, pt1, pt2
Пример #3
0
class YoloLoader():
    def __init__(self):
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()
Пример #4
0
 def __init__(self):
     det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
     det_model.load_weights('models/yolo/yolov3-spp.weights')
     det_model.net_info['height'] = args.inp_dim
     det_inp_dim = int(det_model.net_info['height'])
     assert det_inp_dim % 32 == 0
     assert det_inp_dim > 32
     self.det_inp_dim = det_inp_dim
     det_model.cuda()
     det_model.eval()
     self.det_model = det_model
Пример #5
0
def load_yolo_model(args):
    print('loading yolo model ...')
    det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
    det_model.load_weights('models/yolo/yolov3-spp.weights')
    det_model.net_info['height'] = args.inp_dim
    det_inp_dim = int(det_model.net_info['height'])
    assert det_inp_dim % 32 == 0
    assert det_inp_dim > 32
    det_model.cuda()
    det_model.eval()
    return det_model, det_inp_dim
Пример #6
0
class DetectionLoader2:
    def __init__(self):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("./yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('./models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda(torchCuda)
        self.det_model.eval()

    def load(self,img, orig_img, im_dim_list):
            with torch.no_grad():
                # Human Detection
                img = img.cuda(torchCuda)
                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction, opt.confidence,
                                    opt.num_classes, nms=True, nms_conf=opt.nms_thesh)
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

                
                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

#            for k in range(len(orig_img)):
            k=0
            boxes_k = boxes[dets[:,0]==k]
            inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW)
            pt1 = torch.zeros(boxes_k.size(0), 2)
            pt2 = torch.zeros(boxes_k.size(0), 2)
                
            return (orig_img[k], boxes_k, scores[dets[:,0]==k], inps, pt1, pt2)

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
def load_model(opt):
    pose_dataset = Mscoco()
    pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)

    det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
    det_model.load_weights('models/yolo/yolov3-spp.weights')
    det_model.net_info['height'] = opt.inp_dim
    pose_model.cuda()
    pose_model.eval()
    det_model.cuda()
    det_model.eval()

    return det_model, pose_model
Пример #8
0
def set_yolo(args):
    labelsPath = os.path.sep.join([args["yolo"], "coco.names"])
    labels = load_classes(labelsPath)

    weightsPath = os.path.sep.join([args["yolo"], "yolov3.weights"])
    configPath = os.path.sep.join([args["yolo"], "yolov3.cfg"])

    # load our YOLO object detector trained on COCO dataset (80 classes)
    # and determine only the *output* layer names that we need from YOLO
    print("[INFO] loading YOLO from disk...")
    model = Darknet(configPath)
    model.load_weights(weightsPath)
    model.net_info["height"] = 320
    model.cuda()
    model.eval()
    return labels, model
Пример #9
0
class DarknetModel(object):
    def __init__(self):
        self.scales = "1,2,3"
        self.batch_size = 1
        self.confidence = 0.5
        self.nms_thesh = 0.4
        self.reso = 416
        self.CUDA = False
        self.num_classes = 80
        self.classes = load_classes('data/coco.names') 
        self.colors = load_colors('data/pallete')
        self.model = Darknet('cfg/yolov3.cfg', self.reso)
        self.model.load_state_dict(torch.load('yolov3.pkl'))
        self.inp_dim = self.reso
        assert self.inp_dim % 32 == 0 
        assert self.inp_dim > 32
        if self.CUDA:
            self.model.cuda()
        self.model.eval()
    def predict(self, filename):
        image = cv2.imread(filename)
        img, orig_im, dim = prep_image(image, self.inp_dim)  
        im_dim = torch.FloatTensor(dim).repeat(1,2)        
        if self.CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()
        output = self.model(img)
        output = sift_results(output, self.confidence, self.num_classes, nms = True, nms_conf = self.nms_thesh)
        output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(self.inp_dim))/self.inp_dim
            
        output[:,[1,3]] *= image.shape[1]
        output[:,[2,4]] *= image.shape[0]


        list(map(lambda x: write(x, orig_im, self.classes, self.colors), output))
        return orig_im
Пример #10
0
class DetectionLoader:
    def __init__(self, batchSize=1, queueSize=1, size=100, device=0):

        ## camera stream
        self.stream = cv2.VideoCapture(device)
        assert self.stream.isOpened(), 'Cannot capture from camera'
        self.stream.set(cv2.CAP_PROP_BUFFERSIZE, 1)
        self.inp_dim = int(opt.inp_dim)

        ## yolo model
        self.det_model = Darknet("joints_detectors/Alphapose/yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('joints_detectors/Alphapose/models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()
        self.batchSize = batchSize
        self.datalen = 1
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover

        ## alphapose model
        fast_inference = True
        pose_dataset = Mscoco()
        if fast_inference:
            self.pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
        else:
            self.pose_model = InferenNet(4 * 1 + 1, pose_dataset)
        
        self.pose_model.cuda()
        self.pose_model.eval() 

        ## 2d plotting
        self.fig_in = plt.figure(figsize=(size , size))
        self.ax_in = self.fig_in.add_subplot(1, 1, 1)
        self.ax_in.get_xaxis().set_visible(False)
        self.ax_in.get_yaxis().set_visible(False)
        self.ax_in.set_axis_off()
        self.ax_in.set_title('Input')
        self.initialized = False
        self.size=size
        thismanager = get_current_fig_manager()
        thismanager.window.wm_geometry("+0-1000")
        


    def update(self):

        time1 = time.time()

        _, frame = self.stream.read()
        # frame = cv2.resize(frame, (frame.shape[1]//2,frame.shape[0]//2))

        #TODO TESTING
        # frame[:,:200,:]=0
        # frame[:,450:,:]=0


        img_k, self.orig_img, im_dim_list_k = prep_frame(frame, self.inp_dim)
        
        img = [img_k]
        im_name = ["im_name"]
        im_dim_list = [im_dim_list_k] 

        img = torch.cat(img)
        im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)

        time2 = time.time()


        with torch.no_grad():
            ### detector 
            #########################
            # Human Detection
            img = img.cuda()
            prediction = self.det_model(img, CUDA=True)
            # NMS process
            dets = dynamic_write_results(prediction, opt.confidence,
                                        opt.num_classes, nms=True, nms_conf=opt.nms_thesh)
            if isinstance(dets, int) or dets.shape[0] == 0:   
                self.visualize2dnoperson()
                return None
                
            
            dets = dets.cpu()
            im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long())
            scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

            # coordinate transfer
            dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
            dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

            dets[:, 1:5] /= scaling_factor
            for j in range(dets.shape[0]):
                dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
                dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
            boxes = dets[:, 1:5]
            scores = dets[:, 5:6]

            boxes_k = boxes[dets[:, 0] == 0]
            if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                self.visualize2dnoperson()
                raise NotImplementedError
                return None
            inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW)
            pt1 = torch.zeros(boxes_k.size(0), 2)
            pt2 = torch.zeros(boxes_k.size(0), 2)

            time3 = time.time()


            ### processor 
            #########################
            inp = im_to_torch(cv2.cvtColor(self.orig_img, cv2.COLOR_BGR2RGB))
            inps, pt1, pt2 = self.crop_from_dets(inp, boxes, inps, pt1, pt2)

            ### generator
            #########################            
            self.orig_img = np.array(self.orig_img, dtype=np.uint8)
            # location prediction (n, kp, 2) | score prediction (n, kp, 1)

            datalen = inps.size(0)
            batchSize = 20 #args.posebatch()
            leftover = 0
            if datalen % batchSize:
                leftover = 1
            num_batches = datalen // batchSize + leftover
            hm = []

            time4 = time.time()

            for j in range(num_batches):
                inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda()
                hm_j = self.pose_model(inps_j)
                hm.append(hm_j)
            
            
            hm = torch.cat(hm)
            hm = hm.cpu().data

            preds_hm, preds_img, preds_scores = getPrediction(
                hm, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW)
            result = pose_nms(
                boxes, scores, preds_img, preds_scores)

            time5 = time.time() 
            
                    
            if not result: # No people
                self.visualize2dnoperson()
                return None
            else:
                self.kpt = max(result,
                        key=lambda x: x['proposal_score'].data[0] * calculate_area(x['keypoints']), )['keypoints']
                self.visualize2d()
                return self.kpt 

            time6 = time.time()
            print("process time : {} ".format(time6 - time5))
            

##########################################################################################
##########################################################################################


    def crop_from_dets(self,img, boxes, inps, pt1, pt2):
        '''
        Crop human from origin image according to Dectecion Results
        '''
        imght = img.size(1)
        imgwidth = img.size(2)
        tmp_img = img
        tmp_img[0].add_(-0.406)
        tmp_img[1].add_(-0.457)
        tmp_img[2].add_(-0.480)
        for i, box in enumerate(boxes):
            upLeft = torch.Tensor(
                (float(box[0]), float(box[1])))
            bottomRight = torch.Tensor(
                (float(box[2]), float(box[3])))

            ht = bottomRight[1] - upLeft[1]
            width = bottomRight[0] - upLeft[0]

            scaleRate = 0.3

            upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2)
            upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2)
            bottomRight[0] = max(
                min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2), upLeft[0] + 5)
            bottomRight[1] = max(
                min(imght - 1, bottomRight[1] + ht * scaleRate / 2), upLeft[1] + 5)

            try:
                inps[i] = cropBox(tmp_img.clone(), upLeft, bottomRight, opt.inputResH, opt.inputResW)
            except IndexError:
                print(tmp_img.shape)
                print(upLeft)
                print(bottomRight)
                print('===')
            pt1[i] = upLeft
            pt2[i] = bottomRight

        return inps, pt1, pt2


##########################################################################################
##########################################################################################


    def visualize2d(self):
        if not self.initialized:
            self.image = self.ax_in.imshow(self.orig_img, aspect='equal')
            self.point= self.ax_in.scatter(*self.kpt.T, 5, color='red', edgecolors='white', zorder=10)
            self.initialized = True
        else:
            self.image.set_data(self.orig_img)
            self.point.set_offsets(self.kpt)


    def visualize2dnoperson(self): #TODO
        # Update 2D poses
        if not self.initialized:
            self.image = self.ax_in.imshow(self.orig_img, aspect='equal')
        else:
            self.image.set_data(self.orig_img)
Пример #11
0
class DetectionLoader:
    def __init__(self, dataloder, obj_id, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        cfg_path = "yolo/cfg/yolov3-single.cfg"
        weights_path = 'models/yolo/{:02d}.weights'.format(obj_id)
        self.det_model = Darknet(cfg_path, reso=int(opt.inp_dim))
        self.det_model.load_weights(weights_path)
        print("Loading YOLO cfg from", cfg_path)
        print("Loading YOLO weights from", weights_path)
        self.det_model.eval()
        self.det_model.net_info['height'] = opt.inp_dim  #input_dimension
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        # keep looping the whole dataset
        for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                img = img.cuda()
                # Critical, use yolo to do object detection here!
                prediction = self.det_model(img)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()

                # Scale for SIXD dataset

                reso = self.det_inp_dim
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                w, h = im_dim_list[:, 0], im_dim_list[:, 1]
                w_ratio = w / reso
                h_ratio = h / reso
                boxes = dets[:, 1:5]
                boxes[:, 0] = boxes[:, 0] * w_ratio
                boxes[:, 1] = boxes[:, 1] * h_ratio
                boxes[:, 2] = boxes[:, 2] * w_ratio
                boxes[:, 3] = boxes[:, 3] * h_ratio
                scores = dets[:, 5:6]

                # im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long())
                # scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

                # # coordinate transfer
                # dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
                # dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

                # dets[:, 1:5] /= scaling_factor
                # for j in range(dets.shape[0]):
                #     dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
                #     dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
                # boxes = dets[:, 1:5]
                # scores = dets[:, 5:6]

            img = Image.open(im_name[0])
            draw = ImageDraw.Draw(img)
            for i in range(boxes.shape[0]):
                x1, y1, x2, y2 = boxes[i, 0], boxes[i, 1], boxes[i,
                                                                 2], boxes[i,
                                                                           3]
                objectness = 'conf: %.2f' % scores
                draw.rectangle((x1, y1, x2, y2), outline='red')

            # img.save(im_name[0].replace('rgb', 'results'))

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
Пример #12
0
                             'AlphaPose_webcam' + webcam + '.avi')
    writer = DataWriter(args.save_video, save_path,
                        cv2.VideoWriter_fourcc(*'XVID'), fps,
                        frameSize).start()

    # Load YOLO model
    print('Loading YOLO model..')
    sys.stdout.flush()
    det_model = Darknet("yolo/cfg/yolov3.cfg")
    det_model.load_weights('models/yolo/yolov3.weights')
    det_model.net_info['height'] = args.inp_dim
    det_inp_dim = int(det_model.net_info['height'])
    assert det_inp_dim % 32 == 0
    assert det_inp_dim > 32
    det_model.cuda()
    det_model.eval()

    # Load pose model
    pose_dataset = Mscoco()
    if args.fast_inference:
        pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
    else:
        pose_model = InferenNet(4 * 1 + 1, pose_dataset)
    pose_model.cuda()
    pose_model.eval()

    runtime_profile = {'ld': [], 'dt': [], 'dn': [], 'pt': [], 'pn': []}

    print('Starting webcam demo, press Ctrl + C to terminate...')
    sys.stdout.flush()
    im_names_desc = tqdm(loop())
Пример #13
0
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        # initialize the queue used to store frames read from
        # the video file
        self.Q = LifoQueue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping the whole dataset
        while True:
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            with self.dataloder.Q.mutex:
                self.dataloder.Q.queue.clear()
            with torch.no_grad():
                # Human Detection
                img = img.cuda()
                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
Пример #14
0
class YOLODetector(BaseDetector):
    def __init__(self, cfg, opt=None):
        super(YOLODetector, self).__init__()

        self.detector_cfg = cfg
        self.detector_opt = opt
        self.model_cfg = cfg.get('CONFIG', 'detector/yolo/cfg/yolov3-spp.cfg')
        self.model_weights = cfg.get('WEIGHTS',
                                     'detector/yolo/data/yolov3-spp.weights')
        self.inp_dim = cfg.get('INP_DIM', 608)
        self.nms_thres = cfg.get('NMS_THRES', 0.6)
        self.confidence = cfg.get('CONFIDENCE', 0.05)
        self.num_classes = cfg.get('NUM_CLASSES', 80)
        self.model = None
        self.load_model()

    def load_model(self):
        args = self.detector_opt

        print('Loading YOLO model..')
        self.model = Darknet(self.model_cfg)
        self.model.load_weights(self.model_weights)
        self.model.net_info['height'] = self.inp_dim
        print("Network successfully loaded")

        if args:
            if len(args.gpus) > 1:
                self.model = torch.nn.DataParallel(self.model,
                                                   device_ids=args.gpus).to(
                                                       args.device)
            else:
                self.model.to(args.device)
        else:
            self.model.cuda()
        self.model.eval()

    def image_preprocess(self, img_source):
        """
        Pre-process the img before fed to the object detection network
        Input: image name(str) or raw image data(ndarray or torch.Tensor,channel GBR)
        Output: pre-processed image data(torch.FloatTensor,(1,3,h,w))
        """
        if isinstance(img_source, str):
            img, orig_img, im_dim_list = prep_image(img_source, self.inp_dim)
        elif isinstance(img_source, torch.Tensor) or isinstance(
                img_source, np.ndarray):
            img, orig_img, im_dim_list = prep_frame(img_source, self.inp_dim)
        else:
            raise IOError('Unknown image source type: {}'.format(
                type(img_source)))

        return img

    def images_detection(self, imgs, orig_dim_list):
        """
        Feed the img data into object detection network and 
        collect bbox w.r.t original image size
        Input: imgs(torch.FloatTensor,(b,3,h,w)): pre-processed mini-batch image input
               orig_dim_list(torch.FloatTensor, (b,(w,h,w,h))): original mini-batch image size
        Output: dets(torch.cuda.FloatTensor,(n,(batch_idx,x1,y1,x2,y2,c,s,idx of cls))): object detection results
        """
        args = self.detector_opt
        _CUDA = True
        if args:
            if args.gpus[0] < 0:
                _CUDA = False
        if not self.model:
            self.load_model()
        with torch.no_grad():
            imgs = imgs.to(args.device) if args else imgs.cuda()
            prediction = self.model(imgs, args=args)
            #do nms to the detection results, only human category is left
            dets = self.dynamic_write_results(prediction,
                                              self.confidence,
                                              self.num_classes,
                                              nms=True,
                                              nms_conf=self.nms_thres)

            if isinstance(dets, int) or dets.shape[0] == 0:
                return 0
            dets = dets.cpu()

            orig_dim_list = torch.index_select(orig_dim_list, 0,
                                               dets[:, 0].long())
            scaling_factor = torch.min(self.inp_dim / orig_dim_list,
                                       1)[0].view(-1, 1)
            dets[:, [1, 3]] -= (self.inp_dim - scaling_factor *
                                orig_dim_list[:, 0].view(-1, 1)) / 2
            dets[:, [2, 4]] -= (self.inp_dim - scaling_factor *
                                orig_dim_list[:, 1].view(-1, 1)) / 2
            dets[:, 1:5] /= scaling_factor
            for i in range(dets.shape[0]):
                dets[i, [1, 3]] = torch.clamp(dets[i, [1, 3]], 0.0,
                                              orig_dim_list[i, 0])
                dets[i, [2, 4]] = torch.clamp(dets[i, [2, 4]], 0.0,
                                              orig_dim_list[i, 1])

            return dets

    def dynamic_write_results(self,
                              prediction,
                              confidence,
                              num_classes,
                              nms=True,
                              nms_conf=0.4):
        prediction_bak = prediction.clone()
        dets = self.write_results(prediction.clone(), confidence, num_classes,
                                  nms, nms_conf)
        if isinstance(dets, int):
            return dets

        if dets.shape[0] > 100:
            nms_conf -= 0.05
            dets = self.write_results(prediction_bak.clone(), confidence,
                                      num_classes, nms, nms_conf)

        return dets

    def write_results(self,
                      prediction,
                      confidence,
                      num_classes,
                      nms=True,
                      nms_conf=0.4):
        args = self.detector_opt
        #prediction: (batchsize, num of objects, (xc,yc,w,h,box confidence, 80 class scores))
        conf_mask = (prediction[:, :, 4] >
                     confidence).float().float().unsqueeze(2)
        prediction = prediction * conf_mask

        try:
            ind_nz = torch.nonzero(prediction[:, :, 4],
                                   as_tuple=False).transpose(0,
                                                             1).contiguous()
        except:
            return 0

        #the 3rd channel of prediction: (xc,yc,w,h)->(x1,y1,x2,y2)
        box_a = prediction.new(prediction.shape)
        box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
        box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
        box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
        box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
        prediction[:, :, :4] = box_a[:, :, :4]

        batch_size = prediction.size(0)

        output = prediction.new(1, prediction.size(2) + 1)
        write = False
        num = 0
        for ind in range(batch_size):
            #select the image from the batch
            image_pred = prediction[ind]

            #Get the class having maximum score, and the index of that class
            #Get rid of num_classes softmax scores
            #Add the class index and the class score of class having maximum score
            max_conf, max_conf_score = torch.max(
                image_pred[:, 5:5 + num_classes], 1)
            max_conf = max_conf.float().unsqueeze(1)
            max_conf_score = max_conf_score.float().unsqueeze(1)
            seq = (image_pred[:, :5], max_conf, max_conf_score)
            #image_pred:(n,(x1,y1,x2,y2,c,s,idx of cls))
            image_pred = torch.cat(seq, 1)

            #Get rid of the zero entries
            non_zero_ind = (torch.nonzero(image_pred[:, 4], as_tuple=False))

            image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)

            #Get the various classes detected in the image
            try:
                img_classes = unique(image_pred_[:, -1])
            except:
                continue

            #WE will do NMS classwise
            #print(img_classes)
            for cls in img_classes:
                if cls == 0:
                    continue
                #get the detections with one particular class
                cls_mask = image_pred_ * (image_pred_[:, -1]
                                          == cls).float().unsqueeze(1)
                class_mask_ind = torch.nonzero(cls_mask[:, -2],
                                               as_tuple=False).squeeze()

                image_pred_class = image_pred_[class_mask_ind].view(-1, 7)

                #sort the detections such that the entry with the maximum objectness
                #confidence is at the top
                conf_sort_index = torch.sort(image_pred_class[:, 4],
                                             descending=True)[1]
                image_pred_class = image_pred_class[conf_sort_index]
                idx = image_pred_class.size(0)

                #if nms has to be done
                if nms:
                    if platform.system() != 'Windows':
                        #We use faster rcnn implementation of nms (soft nms is optional)
                        nms_op = getattr(nms_wrapper, 'nms')
                        #nms_op input:(n,(x1,y1,x2,y2,c))
                        #nms_op output: input[inds,:], inds
                        _, inds = nms_op(image_pred_class[:, :5], nms_conf)

                        image_pred_class = image_pred_class[inds]
                    else:
                        # Perform non-maximum suppression
                        max_detections = []
                        while image_pred_class.size(0):
                            # Get detection with highest confidence and save as max detection
                            max_detections.append(
                                image_pred_class[0].unsqueeze(0))
                            # Stop if we're at the last detection
                            if len(image_pred_class) == 1:
                                break
                            # Get the IOUs for all boxes with lower confidence
                            ious = bbox_iou(max_detections[-1],
                                            image_pred_class[1:], args)
                            # Remove detections with IoU >= NMS threshold
                            image_pred_class = image_pred_class[1:][
                                ious < nms_conf]

                        image_pred_class = torch.cat(max_detections).data

                #Concatenate the batch_id of the image to the detection
                #this helps us identify which image does the detection correspond to
                #We use a linear straucture to hold ALL the detections from the batch
                #the batch_dim is flattened
                #batch is identified by extra batch column

                batch_ind = image_pred_class.new(image_pred_class.size(0),
                                                 1).fill_(ind)
                seq = batch_ind, image_pred_class
                if not write:
                    output = torch.cat(seq, 1)
                    write = True
                else:
                    out = torch.cat(seq, 1)
                    output = torch.cat((output, out))
                num += 1

        if not num:
            return 0
        #output:(n,(batch_ind,x1,y1,x2,y2,c,s,idx of cls))
        return output

    def detect_one_img(self, img_name):
        """
        Detect bboxs in one image
        Input: 'str', full path of image
        Output: '[{"category_id":1,"score":float,"bbox":[x,y,w,h],"image_id":str},...]',
        The output results are similar with coco results type, except that image_id uses full path str
        instead of coco %012d id for generalization. 
        """
        args = self.detector_opt
        _CUDA = True
        if args:
            if args.gpus[0] < 0:
                _CUDA = False
        if not self.model:
            self.load_model()
        if isinstance(self.model, torch.nn.DataParallel):
            self.model = self.model.module
        dets_results = []
        #pre-process(scale, normalize, ...) the image
        img, orig_img, img_dim_list = prep_image(img_name, self.inp_dim)
        with torch.no_grad():
            img_dim_list = torch.FloatTensor([img_dim_list]).repeat(1, 2)
            img = img.to(args.device) if args else img.cuda()
            prediction = self.model(img, args=args)
            #do nms to the detection results, only human category is left
            dets = self.dynamic_write_results(prediction,
                                              self.confidence,
                                              self.num_classes,
                                              nms=True,
                                              nms_conf=self.nms_thres)
            if isinstance(dets, int) or dets.shape[0] == 0:
                return None
            dets = dets.cpu()

            img_dim_list = torch.index_select(img_dim_list, 0, dets[:,
                                                                    0].long())
            scaling_factor = torch.min(self.inp_dim / img_dim_list,
                                       1)[0].view(-1, 1)
            dets[:, [1, 3]] -= (self.inp_dim - scaling_factor *
                                img_dim_list[:, 0].view(-1, 1)) / 2
            dets[:, [2, 4]] -= (self.inp_dim - scaling_factor *
                                img_dim_list[:, 1].view(-1, 1)) / 2
            dets[:, 1:5] /= scaling_factor
            for i in range(dets.shape[0]):
                dets[i, [1, 3]] = torch.clamp(dets[i, [1, 3]], 0.0,
                                              img_dim_list[i, 0])
                dets[i, [2, 4]] = torch.clamp(dets[i, [2, 4]], 0.0,
                                              img_dim_list[i, 1])

                #write results
                det_dict = {}
                x = float(dets[i, 1])
                y = float(dets[i, 2])
                w = float(dets[i, 3] - dets[i, 1])
                h = float(dets[i, 4] - dets[i, 2])
                det_dict["category_id"] = 1
                det_dict["score"] = float(dets[i, 5])
                det_dict["bbox"] = [x, y, w, h]
                det_dict["image_id"] = int(
                    os.path.basename(img_name).split('.')[0])
                dets_results.append(det_dict)

            return dets_results
Пример #15
0
class WebcamDetectionLoader:
    def __init__(self, webcam=0, batchSize=1, queueSize=256):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stream = cv2.VideoCapture(int(webcam))
        assert self.stream.isOpened(), 'Cannot open webcam'
        self.stopped = False
        self.batchSize = batchSize

        # initialize the queue used to store frames read from
        # the video file
        self.Q = LifoQueue(maxsize=queueSize)

    def len(self):
        return self.Q.qsize()

    def start(self):
        # start a thread to read frames from the file video stream
        t = threading.Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        print(
            f'WebcamDetectionLoader_update_thread: {threading.currentThread().name}'
        )
        # keep looping
        while True:
            img = []
            inp = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(self.batchSize):
                (grabbed, frame) = self.stream.read()
                h, w, c = frame.shape
                # frame = cv2.resize(frame, (int(w / 4), int(h / 4)), interpolation=cv2.INTER_CUBIC)

                if not grabbed:
                    continue
                # process and add the frame to the queue
                inp_dim = int(opt.inp_dim)
                img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim)
                inp_k = im_to_torch(orig_img_k)

                img.append(img_k)
                inp.append(inp_k)
                orig_img.append(orig_img_k)
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():
                ht = inp[0].size(1)
                wd = inp[0].size(2)
                # Human Detection
                img = Variable(torch.cat(img)).cuda()
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                im_dim_list = im_dim_list.cuda()

                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(inp)):
                        if self.Q.full():
                            with self.Q.mutex:
                                self.Q.queue.clear()
                        self.Q.put((inp[k], orig_img[k], None, None))
                    continue

                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5].cpu()
                scores = dets[:, 5:6].cpu()

            for k in range(len(inp)):
                if self.Q.full():
                    with self.Q.mutex:
                        self.Q.queue.clear()
                self.Q.put((inp[k], orig_img[k], boxes[dets[:, 0] == k],
                            scores[dets[:, 0] == k]))

    def videoinfo(self):
        # indicate the video info
        fourcc = int(self.stream.get(cv2.CAP_PROP_FOURCC))
        fps = self.stream.get(cv2.CAP_PROP_FPS)
        frameSize = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
                     int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        return (fourcc, fps, frameSize)

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def more(self):
        # return True if there are still frames in the queue
        return self.Q.qsize() > 0

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
Пример #16
0
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()
        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        # initialize the queue used to store frames read from
        # the video file
        self.Q = LifoQueue(maxsize=queueSize)
        pose_dataset = Mscoco()
        if opt.fast_inference:
            self.pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
        else:
            self.pose_model = InferenNet(4 * 1 + 1, pose_dataset)
        self.pose_model.cuda()
        self.pose_model.eval()

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        while True:
            (img, orig_img, im_name, im_dim_list) = self.dataloder.getitem()

            with self.dataloder.Q.mutex:
                self.dataloder.Q.queue.clear()
            with torch.no_grad():
                # Human Detection
                #img = img.cuda()
                img = img.cuda()
                prediction = self.det_model(img, CUDA=True)
                # im_dim_list = im_dim_list.cuda()
                frame_id = int(im_name.split('.')[0])
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put(
                        (orig_img, frame_id, None, None, None, None, None))
                    continue

                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])

                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]
                # Pose Estimation
                inp = im_to_torch(orig_img)
                inps = torch.zeros(boxes.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes.size(0), 2)
                pt2 = torch.zeros(boxes.size(0), 2)
                inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2)
                inps = Variable(inps.cuda())
                hm = self.pose_model(inps)
                if boxes is None:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put(
                        (orig_img, frame_id, None, None, None, None, None))
                    continue
                else:
                    preds_hm, preds_img, preds_scores = getPrediction(
                        hm.cpu(), pt1, pt2, opt.inputResH, opt.inputResW,
                        opt.outputResH, opt.outputResW)
                    bbox, b_score, kp, kp_score, roi = pose_nms(
                        orig_img, boxes, scores, preds_img, preds_scores)
                    # result = {
                    #     'imgname': im_name,
                    #     'result': result,
                    #     'orig_img' : orig_img
                    # }

                if self.Q.full():
                    time.sleep(2)
                #self.Q.put((orig_img[k], im_name[k], boxes_k, scores[dets[:,0]==k], inps, pt1, pt2))
                #self.Q.put((result, orig_img, im_name))
                self.Q.put(
                    (orig_img, frame_id, bbox, b_score, kp, kp_score, roi))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
Пример #17
0
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()
        self.dataloader = dataloder
        self.stopped = False
        self.batchSize = batchSize
        # initialize the queue used to store frames read from
        # the video file

    def forward(self, Q_load, Q_det):
        # keep looping the whole dataset

        while True:
            #print(Q_load.qsize(), Q_det.qsize())
            img, orig_img, im_dim_list = Q_load.get()

            with torch.no_grad():
                # Human Detection
                img = img.cuda()

                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:

                    for k in range(len(orig_img)):
                        if Q_det.full():
                            time.sleep(0.1)
                            #print("detectionloaderQ1 full ")
                        #Q_det.put((orig_img[k],  None, None, None, None, None))
                        Q_det.put((None, orig_img[k], None, None, None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)

                inp = im_to_torch(cv2.cvtColor(orig_img[k], cv2.COLOR_BGR2RGB))
                inps, pt1, pt2 = crop_from_dets(inp, boxes_k, inps, pt1, pt2)

                if Q_det.full():
                    time.sleep(0.1)
                    #print("detectionloaderQ3 full ")
                #Q_det.put((orig_img[k],  boxes_k, scores[dets[:,0]==k], inps, pt1, pt2))
                Q_det.put((inps, orig_img[k], boxes_k, scores[dets[:, 0] == k],
                           pt1, pt2))
Пример #18
0
class PeopleDetector:
    def __init__(self,
                 confidence=0.5,
                 nms_thresh=0.4,
                 resolution=416,
                 weights_path='yolo/weights/yolov3.weights',
                 cfg_path='yolo/cfg/yolov3.cfg',
                 num_classes=80,
                 names_path='yolo/data/coco.names'):
        self.confidence = confidence
        self.nms_thesh = nms_thresh
        self.weightsfile = weights_path
        self.cfgfile = cfg_path
        self.CUDA = torch.cuda.is_available()
        self.num_classes = num_classes
        self.classes = load_classes(names_path)
        self.model = Darknet(self.cfgfile)
        self.model.load_weights(self.weightsfile)
        self.model.net_info["height"] = resolution
        self.inp_dim = int(self.model.net_info["height"])
        #Check if resolution is multiple of 32
        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32
        # If there's a GPU availible, put the model on GPU
        if self.CUDA:
            self.model.cuda()
        # Set model in evaluation mode
        self.model.eval()

    def prep_image(self, img):
        """
        Prepare image (resize) for inputting to the neural network.
        """
        orig_im = img
        dim = orig_im.shape[1], orig_im.shape[0]
        img = cv2.resize(orig_im, (self.inp_dim, self.inp_dim))
        img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
        img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
        return img_, orig_im, dim

    def writeSingleLabel(self, x, img, paintBoundingBoxies, color=(0, 0, 255)):
        """
        Put label on top of image if it's not inside another Bounding box
        Default label color: red
        """
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())

        #check if the label of the person is inside a label of a painting
        isInside = False
        for i, box in enumerate(paintBoundingBoxies):
            isInside = (box[0] <= c1[0] < c2[0] <= (box[0] + box[2])
                        and box[1] <= c1[1] < c2[1] <= (box[1] + box[3]))
            if isInside:
                break
        if isInside:
            return
        cls = int(x[-1])
        label = "{0}".format(self.classes[cls])
        cv2.rectangle(img, c1, c2, color, 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2, color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                    cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
        return img

    def writLabels(self, origin_im, netOutput, paintBoundingBoxies):
        """
        Put all the labels on top of image if they are no inside another Bounding box
        """
        list(
            map(
                lambda x: self.writeSingleLabel(
                    x, origin_im, paintBoundingBoxies), netOutput))
        return origin_im

    def detectPeopleFromFrame(self, frame):
        """
        Detect people inside a frame and return bounding boxes
        """
        #Prepare imgs compatible with pytorch
        img, orig_im, dim = self.prep_image(frame)

        #Load img on GPU if available
        if self.CUDA:
            img = img.cuda()

        #Inference time
        with torch.no_grad():
            output = self.model(Variable(img), self.CUDA)

        #Collect 3 stage prediction into single one
        output = write_results(output,
                               self.confidence,
                               self.num_classes,
                               nms=True,
                               nms_conf=self.nms_thesh)

        #If No detection...
        if type(output) == int:
            return None

        #If we have detection maintain only people --> people id == 0
        output = output[output[:, -1] < 1]

        #Resize Label according to input frame dimension
        output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(
            self.inp_dim)) / self.inp_dim
        output[:, [1, 3]] *= frame.shape[1]
        output[:, [2, 4]] *= frame.shape[0]

        if output.shape[1] != 8:
            return None

        return output
Пример #19
0
class DetectionLoader:
    def __init__(self, dataloder):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder

    def detect_image(self, im_path):
        im, ori_im, im_name, im_dim_list = self.dataloder.getitem_yolo(im_path)

        with torch.no_grad():
            im = im.cuda()
        prediction = self.det_model(im, CUDA=True)
        # NMS process
        dets = dynamic_write_results(prediction,
                                     opt.confidence,
                                     opt.num_classes,
                                     nms=True,
                                     nms_conf=opt.nms_thesh)
        if isinstance(dets, int) or dets.shape[0] == 0:
            return (ori_im[0], im_name[0], None, None, None, None, None)

        dets = dets.cpu()
        im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long())
        scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0] \
            .view(-1, 1)
        # coordinate transfer
        dets[:, [1, 3]] -= (self.det_inp_dim -
                            scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
        dets[:, [2, 4]] -= (self.det_inp_dim -
                            scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

        dets[:, 1:5] /= scaling_factor
        for j in range(dets.shape[0]):
            dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j,
                                                                            0])
            dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j,
                                                                            1])
        boxes = dets[:, 1:5]
        scores = dets[:, 5:6]

        if boxes.shape[0] > 1:
            boxes = boxes[scores.argmax()].unsqueeze(0)
            scores = scores[scores.argmax()].unsqueeze(0)
            dets = dets[scores.argmax()].unsqueeze(0)
        # len(ori_im) === 1
        for k in range(len(ori_im)):

            boxes_k = boxes[dets[:, 0] == k]
            if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                return (ori_im[k], im_name[k], None, None, None, None, None)
            inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                               opt.inputResW)
            pt1 = torch.zeros(boxes_k.size(0), 2)
            pt2 = torch.zeros(boxes_k.size(0), 2)
            return (ori_im[k], im_name[k], boxes_k, scores[dets[:, 0] == k],
                    inps, pt1, pt2)
Пример #20
0
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        # 输入的inp_dim是大于32且可以被32整除的数
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        # keep looping the whole dataset
        for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()

            # print('___________show the dataloader original information_________')
            # print('image name',im_name)
            # print('im_dim_list',im_dim_list)
            # # print('!!!!!!!!!!!!!!!!!!!!!!!!!+++++++++++++++++++++++++++++++++++++++++++')
            # print()
            # print()

            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                # Human Detection
                img = img.cuda()
                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()
                # index_select,第一个参数是索引的对象,第二个参数是如何索引(0是行,1是列),第三个参数是索引的范围
                # 返回到检测到目标的索引的im_dim_list(w,h,w,h)
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                # scaling_factor的每个元素就对应一张图片缩放成416的时候所采用的缩放系数
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    # 将输入det张量每个元素的夹紧到区间 [0,im_dim_list对应的 w,h],并返回结果到一个新张量
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
Пример #21
0
class Alphapose_skeleton:
    def __init__(self, cuda_id=0, fast_yolo=False):

        self.time_det = 0.0
        self.time_run = 0.0

        self.cuda_id = cuda_id
        self.target_kps = [5, 6, 7, 8, 9, 10]

        # Load yolo detection model
        print('Loading YOLO model..')
        if fast_yolo:
            self.det_model = Darknet('./AlphaPose/yolo/cfg/yolov3-tiny.cfg', self.cuda_id)
            self.det_model.load_weights('./AlphaPose/models/yolo/yolov3-tiny.weights')
        else:
            self.det_model = Darknet('./AlphaPose/yolo/cfg/yolov3.cfg', self.cuda_id)
            self.det_model.load_weights('./AlphaPose/models/yolo/yolov3.weights')
            
        self.det_model.cuda(self.cuda_id)
        self.det_model.eval()

        # Load pose model
        print('Loading Alphapose pose model..')
        pose_dataset = Mscoco()
        if args.fast_inference:
            self.pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
        else:
            self.pose_model = InferenNet(4 * 1 + 1, pose_dataset)
        self.pose_model.cuda(self.cuda_id)
        self.pose_model.eval()


    def run(self, folder_or_imglist, sample_rate):
        time_run_start = time.time()

        if type(folder_or_imglist) == 'str':
            inputpath = folder_or_imglist
            print(inputpath)
            args.inputpath = inputpath

            # Load input images
            im_names = [img for img in sorted(os.listdir(inputpath)) if img.endswith('jpg')]
            N = len(im_names)
            dataset = Image_loader(im_names, format='yolo')
        else:
            N = len(folder_or_imglist)
            imglist = [img for i, img in enumerate(folder_or_imglist) if i % sample_rate == 0]
            dataset = Image_loader_from_images(imglist, format='yolo')

        # Load detection loader
        test_loader = DetectionLoader(dataset, self.det_model, self.cuda_id).start()
        skeleton_result_list = []
        for i in range(dataset.__len__()):
            with torch.no_grad():
                (inp, orig_img, im_name, boxes, scores) = test_loader.read()

                if boxes is None or boxes.nelement() == 0:
                    skeleton_result = None
                else:
                    # Pose Estimation
                    time_det_start = time.time()
                    inps, pt1, pt2 = crop_from_dets(inp, boxes)
                    inps = Variable(inps.cuda(self.cuda_id))

                    hm = self.pose_model(inps)
                    hm_data = hm.cpu().data

                    preds_hm, preds_img, preds_scores = getPrediction(
                            hm_data, pt1, pt2, args.inputResH, args.inputResW, args.outputResH, args.outputResW)

                    skeleton_result = pose_nms(boxes, scores, preds_img, preds_scores)
                    self.time_det += (time.time() - time_det_start)

                skeleton_result_list.append(skeleton_result)

        skeleton_list = []
        j = 0
        for i in range(N):
            im_name = 'image_{:05d}.jpg'.format(i+1)

            if (i == sample_rate * (1+j)):
                j += 1
            skeleton_result = skeleton_result_list[j]

            skeleton_list.append([im_name.split('/')[-1]])
            if skeleton_result is not None:
                for human in skeleton_result:
                    kp_preds = human['keypoints']
                    kp_scores = human['kp_score']

                    # ## remove small hand 
                    # if float(kp_scores[9]) < 0.2 and float(kp_scores[10]) < 0.2:
                    #     continue

                    for n in range(kp_scores.shape[0]):
                        skeleton_list[-1].append(int(kp_preds[n, 0]))
                        skeleton_list[-1].append(int(kp_preds[n, 1]))
                        skeleton_list[-1].append(round(float(kp_scores[n]), 2))

        self.time_run += (time.time() - time_run_start)
        return skeleton_list

    def runtime(self):
        return self.time_det, self.time_run

    def save_skeleton(self, skeleton_list, outputpath):

        if not os.path.exists(outputpath):
            os.mkdir(outputpath)

        out_file = open(os.path.join(outputpath, 'skeleton.txt'), 'w')
        for skeleton in skeleton_list:
            out_file.write(' '.join(str(x) for x in skeleton))
            out_file.write('\n')
        out_file.close()
Пример #22
0
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet(
            "joints_detectors/Alphapose/yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights(
            'joints_detectors/Alphapose/models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, name="DetectionLoader", args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=(), daemon=True)
            # p = mp.Process(target=self.update, args=())
            # p.daemon = True
            p.start()
        return self

    def update(self):
        while (True):
            sys.stdout.flush()
            print("detection loader len : " + str(self.Q.qsize()))

            # keep looping the whole dataset
            #for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                # Human Detection
                img = img.cuda()
                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:

                    # if self.Q.full():
                    #     time.sleep(2)
                    self.Q.put((orig_img[0], im_name[0], None, None, None,
                                None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            #for k in range(len(orig_img)):
            boxes_k = boxes[dets[:, 0] == 0]
            if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                # if self.Q.full():
                #     time.sleep(2)
                self.Q.put(
                    (orig_img[0], im_name[0], None, None, None, None, None))
                continue
            inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                               opt.inputResW)
            pt1 = torch.zeros(boxes_k.size(0), 2)
            pt2 = torch.zeros(boxes_k.size(0), 2)
            # if self.Q.full():
            #     time.sleep(2)

            self.Q.put((orig_img[0], im_name[0], boxes_k,
                        scores[dets[:, 0] == 0], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
Пример #23
0
class DetectionNetwork(object):
    def __init__(self):
        self.confidence = 0.7
        self.nms_thesh = 0.4
        self.resolution = 640
        self.scales = "1,2,3"

        self.confidence = float(self.confidence)
        self.nms_thesh = float(self.nms_thesh)
        self.CUDA = torch.cuda.is_available()
        self.num_classes = 80
        self.classes = load_classes('yolo/data/coco.names')
        print("Loading network.....")
        self.model_detect = Darknet('cfg/yolov3.cfg')
        self.model_detect.load_weights('yolo/yolov3.weights')
        print("Network successfully loaded")
        self.model_detect.net_info["height"] = self.resolution
        self.inp_dim = int(self.model_detect.net_info["height"])
        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32

        if self.CUDA:
            self.model_detect.cuda()

        self.model_detect.eval()
        self.colors = pkl.load(open("yolo/pallete", "rb"))

    def write(self, x, org_img):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        img = org_img
        cls = int(x[-1])
        label = "{0}".format(self.classes[cls])
        color = random.choice(self.colors)
        cv2.rectangle(img, c1, c2, color, 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2, color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                    cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
        return img

    def detect(self, image, im_dim_list):

        # Detection Inference ##########################################################################################
        prediction = self.model_detect(image, True)
        prediction = write_results(prediction,
                                   self.confidence,
                                   self.num_classes,
                                   nms=True,
                                   nms_conf=self.nms_thesh)
        output = prediction

        # Check if something was found....
        if type(prediction) == int:
            return None

        objs = [self.classes[int(x[-1])] for x in output]
        print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
        print("----------------------------------------------------------")

        # Scaling, considering original input resolution ###############################################################
        im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long())

        scaling_factor = torch.min(self.inp_dim / im_dim_list,
                                   1)[0].view(-1, 1)

        output[:,
               [1, 3]] -= (self.inp_dim -
                           scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
        output[:,
               [2, 4]] -= (self.inp_dim -
                           scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

        output[:, 1:5] /= scaling_factor

        for i in range(output.shape[0]):
            output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                            im_dim_list[i, 0])
            output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                            im_dim_list[i, 1])

        return output

    def visualize_outputs(self, detect_output, draw_image):
        # Draw every bounding box iteratively
        for n_f in range(detect_output.size(0)):
            draw_image = self.write(detect_output[n_f, ...], draw_image)
        return draw_image
Пример #24
0
class VideoDetectionLoader:
    def __init__(self, path, batchSize=4, queueSize=256):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stream = cv2.VideoCapture(path)
        assert self.stream.isOpened(), 'Cannot capture source'
        self.stopped = False
        self.batchSize = batchSize
        self.datalen = int(self.stream.get(cv2.CAP_PROP_FRAME_COUNT))
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queueSize)

    def length(self):
        return self.datalen

    def len(self):
        return self.Q.qsize()

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping the whole video
        for i in range(self.num_batches):
            img = []
            inp = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(i * self.batchSize,
                           min((i + 1) * self.batchSize, self.datalen)):
                (grabbed, frame) = self.stream.read()
                # if the `grabbed` boolean is `False`, then we have
                # reached the end of the video file
                if not grabbed:
                    self.stop()
                    return
                # process and add the frame to the queue
                inp_dim = int(opt.inp_dim)
                img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim)
                inp_k = im_to_torch(orig_img_k)

                img.append(img_k)
                inp.append(inp_k)
                orig_img.append(orig_img_k)
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():
                ht = inp[0].size(1)
                wd = inp[0].size(2)
                # Human Detection
                img = Variable(torch.cat(img)).cuda()
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                im_dim_list = im_dim_list.cuda()

                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(inp)):
                        while self.Q.full():
                            time.sleep(0.2)
                        self.Q.put((inp[k], orig_img[k], None, None))
                    continue

                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5].cpu()
                scores = dets[:, 5:6].cpu()

            for k in range(len(inp)):
                while self.Q.full():
                    time.sleep(0.2)
                self.Q.put((inp[k], orig_img[k], boxes[dets[:, 0] == k],
                            scores[dets[:, 0] == k]))

    def videoinfo(self):
        # indicate the video info
        fourcc = int(self.stream.get(cv2.CAP_PROP_FOURCC))
        fps = self.stream.get(cv2.CAP_PROP_FPS)
        frameSize = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
                     int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        return (fourcc, fps, frameSize)

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def more(self):
        # return True if there are still frames in the queue
        return self.Q.qsize() > 0

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
Пример #25
0
class DetectionLoader:
    def __init__(self,
                 dataset,
                 det_model=None,
                 cuda_id=None,
                 batchSize=4,
                 queueSize=256):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        if det_model is None:
            self.det_model = Darknet('yolo/cfg/yolov3.cfg')
            self.det_model.load_weights('models/yolo/yolov3.weights')
            self.det_model.net_info['height'] = opt.inp_dim
            self.det_inp_dim = int(self.det_model.net_info['height'])
            assert self.det_inp_dim % 32 == 0
            assert self.det_inp_dim > 32
            self.det_model.cuda()
            self.det_model.eval()
        else:
            self.det_model = det_model
            self.det_model.net_info['height'] = opt.inp_dim
            self.det_inp_dim = int(self.det_model.net_info['height'])
            assert self.det_inp_dim % 32 == 0
            assert self.det_inp_dim > 32

        self.cuda_id = cuda_id

        self.stopped = False
        self.dataset = dataset
        self.batchSize = batchSize
        self.datalen = self.dataset.__len__()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping the whole dataset
        for i in range(self.num_batches):
            img = []
            inp = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(i * self.batchSize,
                           min((i + 1) * self.batchSize, self.datalen)):
                img_k, inp_k, orig_img_k, im_name_k, im_dim_list_k = self.dataset.__getitem__(
                    k)
                img.append(img_k)
                inp.append(inp_k)
                orig_img.append(orig_img_k)
                im_name.append(im_name_k)
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():
                ht = inp[0].size(1)
                wd = inp[0].size(2)
                # Human Detection
                if self.cuda_id is None:
                    img = Variable(torch.cat(img)).cuda()
                else:
                    img = Variable(torch.cat(img)).cuda(self.cuda_id)
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                if self.cuda_id is None:
                    im_dim_list = im_dim_list.cuda()
                else:
                    im_dim_list = im_dim_list.cuda(self.cuda_id)

                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh,
                                             cuda_id=self.cuda_id)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(inp)):
                        while self.Q.full():
                            time.sleep(0.2)
                        self.Q.put(
                            (inp[k], orig_img[k], im_name[k], None, None))
                    continue

                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5].cpu()
                scores = dets[:, 5:6].cpu()

            for k in range(len(inp)):
                while self.Q.full():
                    time.sleep(0.2)
                self.Q.put((inp[k], orig_img[k], im_name[k],
                            boxes[dets[:, 0] == k], scores[dets[:, 0] == k]))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
Пример #26
0
class MSRApose_skeleton():
    def __init__(self, cuda_id=0, fast_yolo=False):

        self.time_det = 0.0
        self.time_run = 0.0

        self.num_joints = 17
        self.target_kps = [5, 6, 7, 8, 9, 10]

        # Load yolo detection model
        print('Loading YOLO model..')
        if fast_yolo:
            self.det_model = Darknet('./AlphaPose/yolo/cfg/yolov3-tiny.cfg')
            self.det_model.load_weights(
                './AlphaPose/models/yolo/yolov3-tiny.weights')
        else:
            self.det_model = Darknet("./AlphaPose/yolo/cfg/yolov3.cfg")
            self.det_model.load_weights(
                './AlphaPose/models/yolo/yolov3.weights')

        self.det_model.cuda()
        self.det_model.eval()

        cfg_file = 'MSRAPose/experiments/coco/resnet50/256x192_d256x3_adam_lr1e-3.yaml'
        model_file = 'MSRAPose/models/pytorch/pose_coco/pose_resnet_50_256x192.pth.tar'

        # update config
        update_config(cfg_file)
        config.TEST.MODEL_FILE = model_file

        # cudnn related setting
        cudnn.benchmark = config.CUDNN.BENCHMARK
        torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
        torch.backends.cudnn.enabled = config.CUDNN.ENABLED

        # load pre-trained model
        self.model = eval('models_msra.' + config.MODEL.NAME +
                          '.get_pose_net')(config, is_train=False)
        print('Loading MSRA pose model..')
        print('=> loading model from {}'.format(config.TEST.MODEL_FILE))
        self.model.load_state_dict(torch.load(config.TEST.MODEL_FILE))

        gpus = [int(i) for i in config.GPUS.split(',')]
        self.model = torch.nn.DataParallel(self.model, device_ids=gpus).cuda()
        self.model.eval()

        # image transform
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
        ])

    def _box2cs(self, box, image_width, image_height):
        x, y, w, h = box[:4]
        return self._xywh2cs(x, y, w, h, image_width, image_height)

    def _xywh2cs(self, x, y, w, h, image_width, image_height):
        center = np.zeros((2), dtype=np.float32)
        center[0] = x + w * 0.5
        center[1] = y + h * 0.5

        aspect_ratio = image_width * 1.0 / image_height
        pixel_std = 200

        if w > aspect_ratio * h:
            h = w * 1.0 / aspect_ratio
        elif w < aspect_ratio * h:
            w = h * aspect_ratio
        scale = np.array([w * 1.0 / pixel_std, h * 1.0 / pixel_std],
                         dtype=np.float32)
        if center[0] != -1:
            scale = scale * 1.25

        return center, scale

    def detect_skeleton_on_single_human(self, image, box):
        '''
        input: image read by opencv2
        '''

        data_numpy = image.copy()

        # object detection box
        if box is None:
            box = [0, 0, data_numpy.shape[0], data_numpy.shape[1]]
        c, s = self._box2cs(box, data_numpy.shape[0], data_numpy.shape[1])
        r = 0

        trans = get_affine_transform(c, s, r, config.MODEL.IMAGE_SIZE)
        input = cv2.warpAffine(
            data_numpy,
            trans,
            (int(config.MODEL.IMAGE_SIZE[0]), int(config.MODEL.IMAGE_SIZE[1])),
            flags=cv2.INTER_LINEAR)

        input = self.transform(input).unsqueeze(0)

        with torch.no_grad():
            # compute output heatmap
            output = self.model(input)
            output = output.clone().cpu().numpy()

            # heatmap = output
            # heatmap_hand = heatmap[0][self.target_kps[0]]
            # print(heatmap.shape)
            # for kk in self.target_kps[1:]:
            #     heatmap_hand += heatmap[0][kk]
            # cv2.imshow('skeletons', heatmap_hand)
            # cv2.waitKey()

            # compute coordinate
            preds, maxvals = get_final_preds(config, output, np.asarray([c]),
                                             np.asarray([s]))

            return preds[0]

    def run(self, folder_or_imglist, sample_rate):
        time_run_start = time.time()

        if type(folder_or_imglist) == 'str':
            inputpath = folder_or_imglist
            print(inputpath)
            args.inputpath = inputpath

            # Load input images
            im_names = [
                img for img in sorted(os.listdir(inputpath))
                if img.endswith('jpg')
            ]
            dataset = Image_loader(im_names, format='yolo')
        else:
            imglist = folder_or_imglist
            dataset = Image_loader_from_images(imglist, format='yolo')

        # Load detection loader
        test_loader = DetectionLoader(dataset, self.det_model).start()

        skeleton_list = []
        # final_result = []
        for i in range(dataset.__len__()):
            with torch.no_grad():
                (inp, orig_img, im_name, boxes, scores) = test_loader.read()

                skeleton_result = []
                if boxes is None or boxes.nelement() == 0:
                    skeleton_result = None
                else:
                    # Pose Estimation
                    time_det_start = time.time()
                    for box in boxes.tolist():
                        x1, y1, x2, y2 = int(box[0]), int(box[1]), int(
                            box[2]), int(box[3])
                        box = [x1, y1, x2 - x1, y2 - y1]
                        skeleton_result.append(
                            self.detect_skeleton_on_single_human(
                                orig_img, box))
                    self.time_det += (time.time() - time_det_start)

                skeleton_list.append([im_name.split('/')[-1]])
                if skeleton_result is not None:
                    for human in skeleton_result:
                        for mat in human:
                            skeleton_list[-1].append(int(mat[0]))
                            skeleton_list[-1].append(int(mat[1]))
                            skeleton_list[-1].append(0.8)

        self.time_run += (time.time() - time_run_start)
        return skeleton_list

    def runtime(self):
        return self.time_det, self.time_run

    def generate_target_points(self, joints, image_size, sigma):
        '''
        :param joints:  [num_joints, 3]
        :param joints_vis: [num_joints, 3]
        :return: target, target_weight(1: visible, 0: invisible)
        '''
        # target_weight = np.ones((self.num_joints, 1), dtype=np.float32)
        # target_weight[:, 0] = joints_vis[:, 0]

        target = np.zeros((self.num_joints, image_size[1], image_size[0]),
                          dtype=np.float32)

        tmp_size = sigma * 3

        for joint_id in range(self.num_joints):
            feat_stride = [1, 1]  #image_size / image_size
            mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
            mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
            # Check that any part of the gaussian is in-bounds
            ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
            br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
            if ul[0] >= image_size[0] or ul[1] >= image_size[1] \
                    or br[0] < 0 or br[1] < 0:
                # If not, just return the image as is
                # target_weight[joint_id] = 0
                continue

            # # Generate gaussian
            size = 2 * tmp_size + 1
            x = np.arange(0, size, 1, np.float32)
            y = x[:, np.newaxis]
            x0 = y0 = size // 2
            # The gaussian is not normalized, we want the center value to equal 1
            g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))

            # Usable gaussian range
            g_x = max(0, -ul[0]), min(br[0], image_size[0]) - ul[0]
            g_y = max(0, -ul[1]), min(br[1], image_size[1]) - ul[1]
            # Image range
            img_x = max(0, ul[0]), min(br[0], image_size[0])
            img_y = max(0, ul[1]), min(br[1], image_size[1])

            v = 1  #target_weight[joint_id]
            if v > 0.5:
                target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
                    g[g_y[0]:g_y[1], g_x[0]:g_x[1]]

        return target  #, target_weight

    def generate_target_lines(self, joints, image_size, target_kps):

        l_pair = [
            (0, 1),
            (0, 2),
            (1, 3),
            (2, 4),  # Head
            (5, 6),
            (5, 7),
            (7, 9),
            (6, 8),
            (8, 10),  # Hand
            (17, 11),
            (17, 12),  # Body
            (11, 13),
            (12, 14),
            (13, 15),
            (14, 16)
        ]  # Leg

        line_color = [(0, 215, 255), (0, 255, 204), (0, 134, 255),
                      (0, 255, 50), (77, 255, 222), (77, 196, 255),
                      (77, 135, 255), (191, 255, 77), (77, 255, 77),
                      (77, 222, 255), (255, 156, 127), (0, 127, 255),
                      (255, 127, 77), (0, 77, 255), (255, 77, 36)]

        # Nose, LEye, REye, LEar, REar
        # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist
        # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck
        p_color = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255),
                   (0, 255, 0), (77, 255, 255), (77, 255, 204), (77, 204, 255),
                   (191, 255, 77), (77, 191, 255), (191, 255, 77),
                   (204, 77, 255), (77, 255, 204), (191, 77, 255),
                   (77, 255, 191), (127, 77, 255), (77, 255, 127),
                   (0, 255, 255)]

        img = np.zeros(shape=image_size, dtype='uint8')
        part_line = {}
        for n in range(self.num_joints):
            # if float(kp_scores_h[n]) <= 0.05:
            #     continue

            cor_x, cor_y = int(joints[n][0]), int(joints[n][1])
            part_line[n] = (cor_x, cor_y)
            # cv2.circle(img, (cor_x, cor_y), 4, p_color[n], -1)

        # Draw limbs
        for i, (start_p, end_p) in enumerate(l_pair):
            if i not in target_kps:
                continue

            if start_p in part_line and end_p in part_line:
                start_xy = part_line[start_p]
                end_xy = part_line[end_p]
                cv2.line(img, start_xy, end_xy, line_color[i], 5)
        return img
Пример #27
0
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        # self.det_inp_dim = int(self.det_model.net_info['height'])
        self.det_inp_dim = int(opt.inp_dim)
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        # keep looping the whole dataset
        from mtcnn.mtcnn import MTCNN
        detector = MTCNN()
        for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                if self.dataloder.format == 'yolo':
                    # Human Detection
                    img = img.cuda()
                    prediction = self.det_model(img, CUDA=True)
                    # NMS process
                    dets = dynamic_write_results(prediction,
                                                 opt.confidence,
                                                 opt.num_classes,
                                                 nms=True,
                                                 nms_conf=opt.nms_thesh)

                elif self.dataloder.format == 'mtcnn':
                    # Face detection
                    imgs_np = img.float().mul(255.0).cpu().numpy()
                    imgs_np = np.squeeze(imgs_np, axis=0)
                    imgs_np = np.transpose(imgs_np, (1, 2, 0))
                    dets = detector.detect_faces(imgs_np)
                    fac_det = []
                    for det in dets:
                        fac_det.append([
                            0, det["box"][0], det["box"][1],
                            det["box"][0] + det["box"][2],
                            det["box"][1] + det["box"][3], det["confidence"],
                            0.99, 0
                        ])
                    dets = torch.tensor(fac_det)

                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
Пример #28
0
    num_classes = 80
    bbox_attrs = 5 + num_classes

    model = Darknet(args.cfgfile, height=args.reso)
    model.load_state_dict(torch.load(args.weightsfile))

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    cap = cv2.VideoCapture(args.video)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            img, orig_im, dim = prep_image(frame, inp_dim)
            im_dim = torch.FloatTensor(dim).repeat(1, 2)
            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()
Пример #29
0
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        # keep looping the whole dataset
        """

        :return:
        """
        for i in range(self.num_batches):  # repeat

            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()

            # img = (batch, frames)
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return
            start_time = getTime()
            with torch.no_grad():
                # Human Detection

                img = img.cuda()  # image ( B, 3, 608,608 )
                prediction = self.det_model(img, CUDA=True)

                # ( B, 22743, 85 ) = ( batchsize, proposal boxes, xywh+cls)
                # predictions for each B image.

                # NMS process
                carperson = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True,
                                                  nms_conf=opt.nms_thesh)
                if isinstance(carperson, int) or carperson.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(0.5)
                        self.Q.put((orig_img[k], im_name[k], None, None, None, None, None, None))  # 8 elements
                    continue

                ckpt_time, det_time = getTime(start_time)

                carperson = carperson.cpu()  # (1) k-th image , (7) x,y,w,h,c, cls_score, cls_index
                im_dim_list = torch.index_select(im_dim_list, 0, carperson[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

                # coordinate transfer
                carperson[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
                carperson[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

                carperson[:, 1:5] /= scaling_factor
                for j in range(carperson.shape[0]):
                    carperson[j, [1, 3]] = torch.clamp(carperson[j, [1, 3]], 0.0, im_dim_list[j, 0])
                    carperson[j, [2, 4]] = torch.clamp(carperson[j, [2, 4]], 0.0, im_dim_list[j, 1])

                cls_car_mask = carperson * (carperson[:, -1] == 2).float().unsqueeze(1)  # car
                class__car_mask_ind = torch.nonzero(cls_car_mask[:, -2]).squeeze()
                car_dets = carperson[class__car_mask_ind].view(-1, 8)

                cls_person_mask = carperson * (carperson[:, -1] == 0).float().unsqueeze(1)  # person
                class__person_mask_ind = torch.nonzero(cls_person_mask[:, -2]).squeeze()
                hm_dets = carperson[class__person_mask_ind].view(-1, 8)

                ckpt_time, masking_time = getTime(ckpt_time)

            hm_boxes, hm_scores = None, None

            if hm_dets.size(0) > 0:
                hm_boxes = hm_dets[:, 1:5]
                hm_scores = hm_dets[:, 5:6]

            car_box_conf = None
            if car_dets.size(0) > 0:
                car_box_conf = car_dets

            for k in range(len(orig_img)):  # for k-th image detection.

                if car_box_conf is None:
                    car_k = None
                else:
                    car_k = car_box_conf[car_box_conf[:, 0] == k].numpy()
                    car_k = car_k[np.where(car_k[:, 5] > 0.2)]  # TODO check here, cls or bg/fg confidence?
                    # car_k = non_max_suppression_fast(car_k, overlapThresh=0.3)  # TODO check here, NMS

                if hm_boxes is not None:
                    hm_boxes_k = hm_boxes[hm_dets[:, 0] == k]
                    hm_scores_k = hm_scores[hm_dets[:, 0] == k]
                    inps = torch.zeros(hm_boxes_k.size(0), 3, opt.inputResH, opt.inputResW)
                    pt1 = torch.zeros(hm_boxes_k.size(0), 2)
                    pt2 = torch.zeros(hm_boxes_k.size(0), 2)
                    item = (orig_img[k], im_name[k], hm_boxes_k, hm_scores_k, inps, pt1, pt2, car_k)
                    # print('video processor ', 'image' , im_name[k] , 'hm box ' , hm_boxes_k.size())
                    
                
                else:
                    item = (orig_img[k], im_name[k], None, None, None, None, None, car_k)  # 8-elemetns

                if self.Q.full():
                    time.sleep(0.5)
                self.Q.put(item)

            ckpt_time, distribute_time = getTime(ckpt_time)

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
Пример #30
0
def main():
    global args
    args = parser.parse_args()

    # Yolo
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    CUDA = torch.cuda.is_available()

    num_classes = 80
    bbox_attrs = 5 + num_classes

    model = Darknet(args.config_file)
    model.load_weights(args.weights_file)

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    # Connect
    client = paho.Client()
    host, port = args.broker_url.split(':')
    client.connect(host, int(port))

    # subscribe a system messages
    client.message_callback_add("$SYS/#", system_message)
    client.subscribe("$SYS/#")

    # Open rtsp stream
    cap = cv2.VideoCapture(args.input_url)

    assert cap.isOpened(), 'Cannot capture source {}'.format(args.input_url)

    # Inspect input stream
    input_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    input_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    input_fps = cap.get(cv2.CAP_PROP_FPS)
    print("[input stream] width: {}, height: {}, fps: {}".format(
        input_width, input_height, input_fps))

    # Open output stream
    output_command = stream_factory(args.output_url, input_width, input_height,
                                    input_fps)
    print(output_command)
    output_stream = sp.Popen(output_command, stdin=sp.PIPE, stderr=sp.PIPE)

    frames = 0
    start = time.time()

    while cap.isOpened():
        ret, frame = cap.read()  # frame size: 640x360x3(=691200)
        if ret:
            # Our detect operations on the frame come here

            img, orig_im, dim = prep_image(frame, inp_dim)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                print("FPS of the video is {:5.2f}".format(
                    frames / (time.time() - start)))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0,
                                         float(inp_dim)) / inp_dim

            output[:, [1, 3]] *= frame.shape[1]
            output[:, [2, 4]] *= frame.shape[0]

            classes = load_classes('yolo/data/coco.names')
            colors = pkl.load(open("yolo/pallete", "rb"))

            # Overlay on screen
            list(map(lambda x: write(x, orig_im, classes, colors), output))
            # Send a BBoxes

            # Display the resulting frame
            cv2.imshow("frame", orig_im)
            frames += 1
            print("FPS of the video is {:5.2f}, size: {}".format(
                frames / (time.time() - start), orig_im.size))

            # Write rtmp stream
            output_stream.stdin.write(frame.tostring())
        else:
            break
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Close
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()