Python Darknet.eval примеры использования

Язык программирования: Python

Пространство имен/Пакет: yolo.darknet

Класс/Тип: Darknet

Метод/Функция: eval

Примеров на hotexamples.com: 30

Python Darknet.eval - 30 примеров найдено. Это лучшие примеры Python кода для yolo.darknet.Darknet.eval, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

cuda(30)

eval(30)

load_weights(16)

Darknet(6)

cpu(4)

to(3)

load_state_dict(2)

load_darknet_weights(1)

print_network(1)

state_dict(1)

Пример #1

Показать файл

def init_model(args):
    scales = args.scales

    images = args.images
    batch_size = int(args.bs)
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0

    num_classes = 80
    classes = load_classes('yolo/data/coco.names')
    print("classes")
    print(classes)

    # Set up the neural network
    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    # If there's a GPU availible, put the model on GPU
    if CUDA:
        model.cuda()

    # Set the model in evaluation mode
    model.eval()
    return model

Пример #2

Показать файл

Файл: detector.py Проект: carolchenyx/SportPoseEstimation

class ObjectDetection(object):
    def __init__(self, batchSize=1):
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.eval()

        self.stopped = False
        self.batchSize = batchSize

    def process(self, img, orig_img, im_name, im_dim_list):
        with torch.no_grad():
            # Human Detection
            img = img
            prediction = self.det_model(img, CUDA=False)
            # NMS process
            dets = dynamic_write_results(prediction,
                                         opt.confidence,
                                         opt.num_classes,
                                         nms=True,
                                         nms_conf=opt.nms_thesh)

            if isinstance(dets, int) or dets.shape[0] == 0:
                return orig_img[0], im_name[0], None, None, None, None, None

            dets = dets.cpu()
            im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long())
            scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                       1)[0].view(-1, 1)

            # coordinate transfer
            dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                im_dim_list[:, 0].view(-1, 1)) / 2
            dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                im_dim_list[:, 1].view(-1, 1)) / 2

            dets[:, 1:5] /= scaling_factor
            for j in range(dets.shape[0]):
                dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                              im_dim_list[j, 0])
                dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                              im_dim_list[j, 1])
            boxes = dets[:, 1:5]
            scores = dets[:, 5:6]

        boxes_k = boxes[dets[:, 0] == 0]
        if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
            return orig_img[0], im_name[0], None, None, None, None, None
        inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW)
        pt1 = torch.zeros(boxes_k.size(0), 2)
        pt2 = torch.zeros(boxes_k.size(0), 2)
        return orig_img[0], im_name[0], boxes_k, scores[dets[:, 0] ==
                                                        0], inps, pt1, pt2

Пример #3

Показать файл

class YoloLoader():
    def __init__(self):
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

Пример #4

Показать файл

Файл: inference.py Проект: linye-boli/boli-tracking-0.0.1

 def __init__(self):
     det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
     det_model.load_weights('models/yolo/yolov3-spp.weights')
     det_model.net_info['height'] = args.inp_dim
     det_inp_dim = int(det_model.net_info['height'])
     assert det_inp_dim % 32 == 0
     assert det_inp_dim > 32
     self.det_inp_dim = det_inp_dim
     det_model.cuda()
     det_model.eval()
     self.det_model = det_model

Пример #5

Показать файл

Файл: run3.py Проект: kkkzxx/WonderPose

def load_yolo_model(args):
    print('loading yolo model ...')
    det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
    det_model.load_weights('models/yolo/yolov3-spp.weights')
    det_model.net_info['height'] = args.inp_dim
    det_inp_dim = int(det_model.net_info['height'])
    assert det_inp_dim % 32 == 0
    assert det_inp_dim > 32
    det_model.cuda()
    det_model.eval()
    return det_model, det_inp_dim

Пример #6

Показать файл

Файл: video_loader.py Проект: aibridge/ActionTrainer

class DetectionLoader2:
    def __init__(self):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("./yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('./models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda(torchCuda)
        self.det_model.eval()

    def load(self,img, orig_img, im_dim_list):
            with torch.no_grad():
                # Human Detection
                img = img.cuda(torchCuda)
                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction, opt.confidence,
                                    opt.num_classes, nms=True, nms_conf=opt.nms_thesh)
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

                
                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

#            for k in range(len(orig_img)):
            k=0
            boxes_k = boxes[dets[:,0]==k]
            inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW)
            pt1 = torch.zeros(boxes_k.size(0), 2)
            pt2 = torch.zeros(boxes_k.size(0), 2)
                
            return (orig_img[k], boxes_k, scores[dets[:,0]==k], inps, pt1, pt2)

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()

Пример #7

Показать файл

Файл: detect_fall.py Проект: askeySnip/Fall-Detection-Network

def load_model(opt):
    pose_dataset = Mscoco()
    pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)

    det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
    det_model.load_weights('models/yolo/yolov3-spp.weights')
    det_model.net_info['height'] = opt.inp_dim
    pose_model.cuda()
    pose_model.eval()
    det_model.cuda()
    det_model.eval()

    return det_model, pose_model

Пример #8

Показать файл

def set_yolo(args):
    labelsPath = os.path.sep.join([args["yolo"], "coco.names"])
    labels = load_classes(labelsPath)

    weightsPath = os.path.sep.join([args["yolo"], "yolov3.weights"])
    configPath = os.path.sep.join([args["yolo"], "yolov3.cfg"])

    # load our YOLO object detector trained on COCO dataset (80 classes)
    # and determine only the *output* layer names that we need from YOLO
    print("[INFO] loading YOLO from disk...")
    model = Darknet(configPath)
    model.load_weights(weightsPath)
    model.net_info["height"] = 320
    model.cuda()
    model.eval()
    return labels, model

Пример #9

Показать файл

Файл: model.py Проект: liuslnlp/YOLO-v3-PyTorch

class DarknetModel(object):
    def __init__(self):
        self.scales = "1,2,3"
        self.batch_size = 1
        self.confidence = 0.5
        self.nms_thesh = 0.4
        self.reso = 416
        self.CUDA = False
        self.num_classes = 80
        self.classes = load_classes('data/coco.names') 
        self.colors = load_colors('data/pallete')
        self.model = Darknet('cfg/yolov3.cfg', self.reso)
        self.model.load_state_dict(torch.load('yolov3.pkl'))
        self.inp_dim = self.reso
        assert self.inp_dim % 32 == 0 
        assert self.inp_dim > 32
        if self.CUDA:
            self.model.cuda()
        self.model.eval()
    def predict(self, filename):
        image = cv2.imread(filename)
        img, orig_im, dim = prep_image(image, self.inp_dim)  
        im_dim = torch.FloatTensor(dim).repeat(1,2)        
        if self.CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()
        output = self.model(img)
        output = sift_results(output, self.confidence, self.num_classes, nms = True, nms_conf = self.nms_thesh)
        output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(self.inp_dim))/self.inp_dim
            
        output[:,[1,3]] *= image.shape[1]
        output[:,[2,4]] *= image.shape[0]


        list(map(lambda x: write(x, orig_im, self.classes, self.colors), output))
        return orig_im

Пример #10

Показать файл

class DetectionLoader:
    def __init__(self, batchSize=1, queueSize=1, size=100, device=0):

        ## camera stream
        self.stream = cv2.VideoCapture(device)
        assert self.stream.isOpened(), 'Cannot capture from camera'
        self.stream.set(cv2.CAP_PROP_BUFFERSIZE, 1)
        self.inp_dim = int(opt.inp_dim)

        ## yolo model
        self.det_model = Darknet("joints_detectors/Alphapose/yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('joints_detectors/Alphapose/models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()
        self.batchSize = batchSize
        self.datalen = 1
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover

        ## alphapose model
        fast_inference = True
        pose_dataset = Mscoco()
        if fast_inference:
            self.pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
        else:
            self.pose_model = InferenNet(4 * 1 + 1, pose_dataset)
        
        self.pose_model.cuda()
        self.pose_model.eval() 

        ## 2d plotting
        self.fig_in = plt.figure(figsize=(size , size))
        self.ax_in = self.fig_in.add_subplot(1, 1, 1)
        self.ax_in.get_xaxis().set_visible(False)
        self.ax_in.get_yaxis().set_visible(False)
        self.ax_in.set_axis_off()
        self.ax_in.set_title('Input')
        self.initialized = False
        self.size=size
        thismanager = get_current_fig_manager()
        thismanager.window.wm_geometry("+0-1000")
        


    def update(self):

        time1 = time.time()

        _, frame = self.stream.read()
        # frame = cv2.resize(frame, (frame.shape[1]//2,frame.shape[0]//2))

        #TODO TESTING
        # frame[:,:200,:]=0
        # frame[:,450:,:]=0


        img_k, self.orig_img, im_dim_list_k = prep_frame(frame, self.inp_dim)
        
        img = [img_k]
        im_name = ["im_name"]
        im_dim_list = [im_dim_list_k] 

        img = torch.cat(img)
        im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)

        time2 = time.time()


        with torch.no_grad():
            ### detector 
            #########################
            # Human Detection
            img = img.cuda()
            prediction = self.det_model(img, CUDA=True)
            # NMS process
            dets = dynamic_write_results(prediction, opt.confidence,
                                        opt.num_classes, nms=True, nms_conf=opt.nms_thesh)
            if isinstance(dets, int) or dets.shape[0] == 0:   
                self.visualize2dnoperson()
                return None
                
            
            dets = dets.cpu()
            im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long())
            scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

            # coordinate transfer
            dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
            dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

            dets[:, 1:5] /= scaling_factor
            for j in range(dets.shape[0]):
                dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
                dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
            boxes = dets[:, 1:5]
            scores = dets[:, 5:6]

            boxes_k = boxes[dets[:, 0] == 0]
            if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                self.visualize2dnoperson()
                raise NotImplementedError
                return None
            inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW)
            pt1 = torch.zeros(boxes_k.size(0), 2)
            pt2 = torch.zeros(boxes_k.size(0), 2)

            time3 = time.time()


            ### processor 
            #########################
            inp = im_to_torch(cv2.cvtColor(self.orig_img, cv2.COLOR_BGR2RGB))
            inps, pt1, pt2 = self.crop_from_dets(inp, boxes, inps, pt1, pt2)

            ### generator
            #########################            
            self.orig_img = np.array(self.orig_img, dtype=np.uint8)
            # location prediction (n, kp, 2) | score prediction (n, kp, 1)

            datalen = inps.size(0)
            batchSize = 20 #args.posebatch()
            leftover = 0
            if datalen % batchSize:
                leftover = 1
            num_batches = datalen // batchSize + leftover
            hm = []

            time4 = time.time()

            for j in range(num_batches):
                inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda()
                hm_j = self.pose_model(inps_j)
                hm.append(hm_j)
            
            
            hm = torch.cat(hm)
            hm = hm.cpu().data

            preds_hm, preds_img, preds_scores = getPrediction(
                hm, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW)
            result = pose_nms(
                boxes, scores, preds_img, preds_scores)

            time5 = time.time() 
            
                    
            if not result: # No people
                self.visualize2dnoperson()
                return None
            else:
                self.kpt = max(result,
                        key=lambda x: x['proposal_score'].data[0] * calculate_area(x['keypoints']), )['keypoints']
                self.visualize2d()
                return self.kpt 

            time6 = time.time()
            print("process time : {} ".format(time6 - time5))
            

##########################################################################################
##########################################################################################


    def crop_from_dets(self,img, boxes, inps, pt1, pt2):
        '''
        Crop human from origin image according to Dectecion Results
        '''
        imght = img.size(1)
        imgwidth = img.size(2)
        tmp_img = img
        tmp_img[0].add_(-0.406)
        tmp_img[1].add_(-0.457)
        tmp_img[2].add_(-0.480)
        for i, box in enumerate(boxes):
            upLeft = torch.Tensor(
                (float(box[0]), float(box[1])))
            bottomRight = torch.Tensor(
                (float(box[2]), float(box[3])))

            ht = bottomRight[1] - upLeft[1]
            width = bottomRight[0] - upLeft[0]

            scaleRate = 0.3

            upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2)
            upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2)
            bottomRight[0] = max(
                min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2), upLeft[0] + 5)
            bottomRight[1] = max(
                min(imght - 1, bottomRight[1] + ht * scaleRate / 2), upLeft[1] + 5)

            try:
                inps[i] = cropBox(tmp_img.clone(), upLeft, bottomRight, opt.inputResH, opt.inputResW)
            except IndexError:
                print(tmp_img.shape)
                print(upLeft)
                print(bottomRight)
                print('===')
            pt1[i] = upLeft
            pt2[i] = bottomRight

        return inps, pt1, pt2


##########################################################################################
##########################################################################################


    def visualize2d(self):
        if not self.initialized:
            self.image = self.ax_in.imshow(self.orig_img, aspect='equal')
            self.point= self.ax_in.scatter(*self.kpt.T, 5, color='red', edgecolors='white', zorder=10)
            self.initialized = True
        else:
            self.image.set_data(self.orig_img)
            self.point.set_offsets(self.kpt)


    def visualize2dnoperson(self): #TODO
        # Update 2D poses
        if not self.initialized:
            self.image = self.ax_in.imshow(self.orig_img, aspect='equal')
        else:
            self.image.set_data(self.orig_img)

Пример #11

Показать файл

class DetectionLoader:
    def __init__(self, dataloder, obj_id, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        cfg_path = "yolo/cfg/yolov3-single.cfg"
        weights_path = 'models/yolo/{:02d}.weights'.format(obj_id)
        self.det_model = Darknet(cfg_path, reso=int(opt.inp_dim))
        self.det_model.load_weights(weights_path)
        print("Loading YOLO cfg from", cfg_path)
        print("Loading YOLO weights from", weights_path)
        self.det_model.eval()
        self.det_model.net_info['height'] = opt.inp_dim  #input_dimension
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        # keep looping the whole dataset
        for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                img = img.cuda()
                # Critical, use yolo to do object detection here!
                prediction = self.det_model(img)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()

                # Scale for SIXD dataset

                reso = self.det_inp_dim
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                w, h = im_dim_list[:, 0], im_dim_list[:, 1]
                w_ratio = w / reso
                h_ratio = h / reso
                boxes = dets[:, 1:5]
                boxes[:, 0] = boxes[:, 0] * w_ratio
                boxes[:, 1] = boxes[:, 1] * h_ratio
                boxes[:, 2] = boxes[:, 2] * w_ratio
                boxes[:, 3] = boxes[:, 3] * h_ratio
                scores = dets[:, 5:6]

                # im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long())
                # scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

                # # coordinate transfer
                # dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
                # dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

                # dets[:, 1:5] /= scaling_factor
                # for j in range(dets.shape[0]):
                #     dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
                #     dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
                # boxes = dets[:, 1:5]
                # scores = dets[:, 5:6]

            img = Image.open(im_name[0])
            draw = ImageDraw.Draw(img)
            for i in range(boxes.shape[0]):
                x1, y1, x2, y2 = boxes[i, 0], boxes[i, 1], boxes[i,
                                                                 2], boxes[i,
                                                                           3]
                objectness = 'conf: %.2f' % scores
                draw.rectangle((x1, y1, x2, y2), outline='red')

            # img.save(im_name[0].replace('rgb', 'results'))

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()

Пример #12

Показать файл

                             'AlphaPose_webcam' + webcam + '.avi')
    writer = DataWriter(args.save_video, save_path,
                        cv2.VideoWriter_fourcc(*'XVID'), fps,
                        frameSize).start()

    # Load YOLO model
    print('Loading YOLO model..')
    sys.stdout.flush()
    det_model = Darknet("yolo/cfg/yolov3.cfg")
    det_model.load_weights('models/yolo/yolov3.weights')
    det_model.net_info['height'] = args.inp_dim
    det_inp_dim = int(det_model.net_info['height'])
    assert det_inp_dim % 32 == 0
    assert det_inp_dim > 32
    det_model.cuda()
    det_model.eval()

    # Load pose model
    pose_dataset = Mscoco()
    if args.fast_inference:
        pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
    else:
        pose_model = InferenNet(4 * 1 + 1, pose_dataset)
    pose_model.cuda()
    pose_model.eval()

    runtime_profile = {'ld': [], 'dt': [], 'dn': [], 'pt': [], 'pn': []}

    print('Starting webcam demo, press Ctrl + C to terminate...')
    sys.stdout.flush()
    im_names_desc = tqdm(loop())

Пример #13

Показать файл

class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        # initialize the queue used to store frames read from
        # the video file
        self.Q = LifoQueue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping the whole dataset
        while True:
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            with self.dataloder.Q.mutex:
                self.dataloder.Q.queue.clear()
            with torch.no_grad():
                # Human Detection
                img = img.cuda()
                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()

Пример #14

Показать файл

Файл: yolo_api.py Проект: zihan987/AlphAction

class YOLODetector(BaseDetector):
    def __init__(self, cfg, opt=None):
        super(YOLODetector, self).__init__()

        self.detector_cfg = cfg
        self.detector_opt = opt
        self.model_cfg = cfg.get('CONFIG', 'detector/yolo/cfg/yolov3-spp.cfg')
        self.model_weights = cfg.get('WEIGHTS',
                                     'detector/yolo/data/yolov3-spp.weights')
        self.inp_dim = cfg.get('INP_DIM', 608)
        self.nms_thres = cfg.get('NMS_THRES', 0.6)
        self.confidence = cfg.get('CONFIDENCE', 0.05)
        self.num_classes = cfg.get('NUM_CLASSES', 80)
        self.model = None
        self.load_model()

    def load_model(self):
        args = self.detector_opt

        print('Loading YOLO model..')
        self.model = Darknet(self.model_cfg)
        self.model.load_weights(self.model_weights)
        self.model.net_info['height'] = self.inp_dim
        print("Network successfully loaded")

        if args:
            if len(args.gpus) > 1:
                self.model = torch.nn.DataParallel(self.model,
                                                   device_ids=args.gpus).to(
                                                       args.device)
            else:
                self.model.to(args.device)
        else:
            self.model.cuda()
        self.model.eval()

    def image_preprocess(self, img_source):
        """
        Pre-process the img before fed to the object detection network
        Input: image name(str) or raw image data(ndarray or torch.Tensor,channel GBR)
        Output: pre-processed image data(torch.FloatTensor,(1,3,h,w))
        """
        if isinstance(img_source, str):
            img, orig_img, im_dim_list = prep_image(img_source, self.inp_dim)
        elif isinstance(img_source, torch.Tensor) or isinstance(
                img_source, np.ndarray):
            img, orig_img, im_dim_list = prep_frame(img_source, self.inp_dim)
        else:
            raise IOError('Unknown image source type: {}'.format(
                type(img_source)))

        return img

    def images_detection(self, imgs, orig_dim_list):
        """
        Feed the img data into object detection network and 
        collect bbox w.r.t original image size
        Input: imgs(torch.FloatTensor,(b,3,h,w)): pre-processed mini-batch image input
               orig_dim_list(torch.FloatTensor, (b,(w,h,w,h))): original mini-batch image size
        Output: dets(torch.cuda.FloatTensor,(n,(batch_idx,x1,y1,x2,y2,c,s,idx of cls))): object detection results
        """
        args = self.detector_opt
        _CUDA = True
        if args:
            if args.gpus[0] < 0:
                _CUDA = False
        if not self.model:
            self.load_model()
        with torch.no_grad():
            imgs = imgs.to(args.device) if args else imgs.cuda()
            prediction = self.model(imgs, args=args)
            #do nms to the detection results, only human category is left
            dets = self.dynamic_write_results(prediction,
                                              self.confidence,
                                              self.num_classes,
                                              nms=True,
                                              nms_conf=self.nms_thres)

            if isinstance(dets, int) or dets.shape[0] == 0:
                return 0
            dets = dets.cpu()

            orig_dim_list = torch.index_select(orig_dim_list, 0,
                                               dets[:, 0].long())
            scaling_factor = torch.min(self.inp_dim / orig_dim_list,
                                       1)[0].view(-1, 1)
            dets[:, [1, 3]] -= (self.inp_dim - scaling_factor *
                                orig_dim_list[:, 0].view(-1, 1)) / 2
            dets[:, [2, 4]] -= (self.inp_dim - scaling_factor *
                                orig_dim_list[:, 1].view(-1, 1)) / 2
            dets[:, 1:5] /= scaling_factor
            for i in range(dets.shape[0]):
                dets[i, [1, 3]] = torch.clamp(dets[i, [1, 3]], 0.0,
                                              orig_dim_list[i, 0])
                dets[i, [2, 4]] = torch.clamp(dets[i, [2, 4]], 0.0,
                                              orig_dim_list[i, 1])

            return dets

    def dynamic_write_results(self,
                              prediction,
                              confidence,
                              num_classes,
                              nms=True,
                              nms_conf=0.4):
        prediction_bak = prediction.clone()
        dets = self.write_results(prediction.clone(), confidence, num_classes,
                                  nms, nms_conf)
        if isinstance(dets, int):
            return dets

        if dets.shape[0] > 100:
            nms_conf -= 0.05
            dets = self.write_results(prediction_bak.clone(), confidence,
                                      num_classes, nms, nms_conf)

        return dets

    def write_results(self,
                      prediction,
                      confidence,
                      num_classes,
                      nms=True,
                      nms_conf=0.4):
        args = self.detector_opt
        #prediction: (batchsize, num of objects, (xc,yc,w,h,box confidence, 80 class scores))
        conf_mask = (prediction[:, :, 4] >
                     confidence).float().float().unsqueeze(2)
        prediction = prediction * conf_mask

        try:
            ind_nz = torch.nonzero(prediction[:, :, 4],
                                   as_tuple=False).transpose(0,
                                                             1).contiguous()
        except:
            return 0

        #the 3rd channel of prediction: (xc,yc,w,h)->(x1,y1,x2,y2)
        box_a = prediction.new(prediction.shape)
        box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
        box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
        box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
        box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
        prediction[:, :, :4] = box_a[:, :, :4]

        batch_size = prediction.size(0)

        output = prediction.new(1, prediction.size(2) + 1)
        write = False
        num = 0
        for ind in range(batch_size):
            #select the image from the batch
            image_pred = prediction[ind]

            #Get the class having maximum score, and the index of that class
            #Get rid of num_classes softmax scores
            #Add the class index and the class score of class having maximum score
            max_conf, max_conf_score = torch.max(
                image_pred[:, 5:5 + num_classes], 1)
            max_conf = max_conf.float().unsqueeze(1)
            max_conf_score = max_conf_score.float().unsqueeze(1)
            seq = (image_pred[:, :5], max_conf, max_conf_score)
            #image_pred:(n,(x1,y1,x2,y2,c,s,idx of cls))
            image_pred = torch.cat(seq, 1)

            #Get rid of the zero entries
            non_zero_ind = (torch.nonzero(image_pred[:, 4], as_tuple=False))

            image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)

            #Get the various classes detected in the image
            try:
                img_classes = unique(image_pred_[:, -1])
            except:
                continue

            #WE will do NMS classwise
            #print(img_classes)
            for cls in img_classes:
                if cls == 0:
                    continue
                #get the detections with one particular class
                cls_mask = image_pred_ * (image_pred_[:, -1]
                                          == cls).float().unsqueeze(1)
                class_mask_ind = torch.nonzero(cls_mask[:, -2],
                                               as_tuple=False).squeeze()

                image_pred_class = image_pred_[class_mask_ind].view(-1, 7)

                #sort the detections such that the entry with the maximum objectness
                #confidence is at the top
                conf_sort_index = torch.sort(image_pred_class[:, 4],
                                             descending=True)[1]
                image_pred_class = image_pred_class[conf_sort_index]
                idx = image_pred_class.size(0)

                #if nms has to be done
                if nms:
                    if platform.system() != 'Windows':
                        #We use faster rcnn implementation of nms (soft nms is optional)
                        nms_op = getattr(nms_wrapper, 'nms')
                        #nms_op input:(n,(x1,y1,x2,y2,c))
                        #nms_op output: input[inds,:], inds
                        _, inds = nms_op(image_pred_class[:, :5], nms_conf)

                        image_pred_class = image_pred_class[inds]
                    else:
                        # Perform non-maximum suppression
                        max_detections = []
                        while image_pred_class.size(0):
                            # Get detection with highest confidence and save as max detection
                            max_detections.append(
                                image_pred_class[0].unsqueeze(0))
                            # Stop if we're at the last detection
                            if len(image_pred_class) == 1:
                                break
                            # Get the IOUs for all boxes with lower confidence
                            ious = bbox_iou(max_detections[-1],
                                            image_pred_class[1:], args)
                            # Remove detections with IoU >= NMS threshold
                            image_pred_class = image_pred_class[1:][
                                ious < nms_conf]

                        image_pred_class = torch.cat(max_detections).data

                #Concatenate the batch_id of the image to the detection
                #this helps us identify which image does the detection correspond to
                #We use a linear straucture to hold ALL the detections from the batch
                #the batch_dim is flattened
                #batch is identified by extra batch column

                batch_ind = image_pred_class.new(image_pred_class.size(0),
                                                 1).fill_(ind)
                seq = batch_ind, image_pred_class
                if not write:
                    output = torch.cat(seq, 1)
                    write = True
                else:
                    out = torch.cat(seq, 1)
                    output = torch.cat((output, out))
                num += 1

        if not num:
            return 0
        #output:(n,(batch_ind,x1,y1,x2,y2,c,s,idx of cls))
        return output

    def detect_one_img(self, img_name):
        """
        Detect bboxs in one image
        Input: 'str', full path of image
        Output: '[{"category_id":1,"score":float,"bbox":[x,y,w,h],"image_id":str},...]',
        The output results are similar with coco results type, except that image_id uses full path str
        instead of coco %012d id for generalization. 
        """
        args = self.detector_opt
        _CUDA = True
        if args:
            if args.gpus[0] < 0:
                _CUDA = False
        if not self.model:
            self.load_model()
        if isinstance(self.model, torch.nn.DataParallel):
            self.model = self.model.module
        dets_results = []
        #pre-process(scale, normalize, ...) the image
        img, orig_img, img_dim_list = prep_image(img_name, self.inp_dim)
        with torch.no_grad():
            img_dim_list = torch.FloatTensor([img_dim_list]).repeat(1, 2)
            img = img.to(args.device) if args else img.cuda()
            prediction = self.model(img, args=args)
            #do nms to the detection results, only human category is left
            dets = self.dynamic_write_results(prediction,
                                              self.confidence,
                                              self.num_classes,
                                              nms=True,
                                              nms_conf=self.nms_thres)
            if isinstance(dets, int) or dets.shape[0] == 0:
                return None
            dets = dets.cpu()

            img_dim_list = torch.index_select(img_dim_list, 0, dets[:,
                                                                    0].long())
            scaling_factor = torch.min(self.inp_dim / img_dim_list,
                                       1)[0].view(-1, 1)
            dets[:, [1, 3]] -= (self.inp_dim - scaling_factor *
                                img_dim_list[:, 0].view(-1, 1)) / 2
            dets[:, [2, 4]] -= (self.inp_dim - scaling_factor *
                                img_dim_list[:, 1].view(-1, 1)) / 2
            dets[:, 1:5] /= scaling_factor
            for i in range(dets.shape[0]):
                dets[i, [1, 3]] = torch.clamp(dets[i, [1, 3]], 0.0,
                                              img_dim_list[i, 0])
                dets[i, [2, 4]] = torch.clamp(dets[i, [2, 4]], 0.0,
                                              img_dim_list[i, 1])

                #write results
                det_dict = {}
                x = float(dets[i, 1])
                y = float(dets[i, 2])
                w = float(dets[i, 3] - dets[i, 1])
                h = float(dets[i, 4] - dets[i, 2])
                det_dict["category_id"] = 1
                det_dict["score"] = float(dets[i, 5])
                det_dict["bbox"] = [x, y, w, h]
                det_dict["image_id"] = int(
                    os.path.basename(img_name).split('.')[0])
                dets_results.append(det_dict)

            return dets_results

Пример #15

Показать файл

class WebcamDetectionLoader:
    def __init__(self, webcam=0, batchSize=1, queueSize=256):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stream = cv2.VideoCapture(int(webcam))
        assert self.stream.isOpened(), 'Cannot open webcam'
        self.stopped = False
        self.batchSize = batchSize

        # initialize the queue used to store frames read from
        # the video file
        self.Q = LifoQueue(maxsize=queueSize)

    def len(self):
        return self.Q.qsize()

    def start(self):
        # start a thread to read frames from the file video stream
        t = threading.Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        print(
            f'WebcamDetectionLoader_update_thread: {threading.currentThread().name}'
        )
        # keep looping
        while True:
            img = []
            inp = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(self.batchSize):
                (grabbed, frame) = self.stream.read()
                h, w, c = frame.shape
                # frame = cv2.resize(frame, (int(w / 4), int(h / 4)), interpolation=cv2.INTER_CUBIC)

                if not grabbed:
                    continue
                # process and add the frame to the queue
                inp_dim = int(opt.inp_dim)
                img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim)
                inp_k = im_to_torch(orig_img_k)

                img.append(img_k)
                inp.append(inp_k)
                orig_img.append(orig_img_k)
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():
                ht = inp[0].size(1)
                wd = inp[0].size(2)
                # Human Detection
                img = Variable(torch.cat(img)).cuda()
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                im_dim_list = im_dim_list.cuda()

                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(inp)):
                        if self.Q.full():
                            with self.Q.mutex:
                                self.Q.queue.clear()
                        self.Q.put((inp[k], orig_img[k], None, None))
                    continue

                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5].cpu()
                scores = dets[:, 5:6].cpu()

            for k in range(len(inp)):
                if self.Q.full():
                    with self.Q.mutex:
                        self.Q.queue.clear()
                self.Q.put((inp[k], orig_img[k], boxes[dets[:, 0] == k],
                            scores[dets[:, 0] == k]))

    def videoinfo(self):
        # indicate the video info
        fourcc = int(self.stream.get(cv2.CAP_PROP_FOURCC))
        fps = self.stream.get(cv2.CAP_PROP_FPS)
        frameSize = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
                     int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        return (fourcc, fps, frameSize)

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def more(self):
        # return True if there are still frames in the queue
        return self.Q.qsize() > 0

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True

Пример #16

Показать файл

Файл: dataloader.py Проект: linye-boli/boli-tracking-0.0.1

class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()
        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        # initialize the queue used to store frames read from
        # the video file
        self.Q = LifoQueue(maxsize=queueSize)
        pose_dataset = Mscoco()
        if opt.fast_inference:
            self.pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
        else:
            self.pose_model = InferenNet(4 * 1 + 1, pose_dataset)
        self.pose_model.cuda()
        self.pose_model.eval()

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        while True:
            (img, orig_img, im_name, im_dim_list) = self.dataloder.getitem()

            with self.dataloder.Q.mutex:
                self.dataloder.Q.queue.clear()
            with torch.no_grad():
                # Human Detection
                #img = img.cuda()
                img = img.cuda()
                prediction = self.det_model(img, CUDA=True)
                # im_dim_list = im_dim_list.cuda()
                frame_id = int(im_name.split('.')[0])
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put(
                        (orig_img, frame_id, None, None, None, None, None))
                    continue

                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])

                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]
                # Pose Estimation
                inp = im_to_torch(orig_img)
                inps = torch.zeros(boxes.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes.size(0), 2)
                pt2 = torch.zeros(boxes.size(0), 2)
                inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2)
                inps = Variable(inps.cuda())
                hm = self.pose_model(inps)
                if boxes is None:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put(
                        (orig_img, frame_id, None, None, None, None, None))
                    continue
                else:
                    preds_hm, preds_img, preds_scores = getPrediction(
                        hm.cpu(), pt1, pt2, opt.inputResH, opt.inputResW,
                        opt.outputResH, opt.outputResW)
                    bbox, b_score, kp, kp_score, roi = pose_nms(
                        orig_img, boxes, scores, preds_img, preds_scores)
                    # result = {
                    #     'imgname': im_name,
                    #     'result': result,
                    #     'orig_img' : orig_img
                    # }

                if self.Q.full():
                    time.sleep(2)
                #self.Q.put((orig_img[k], im_name[k], boxes_k, scores[dets[:,0]==k], inps, pt1, pt2))
                #self.Q.put((result, orig_img, im_name))
                self.Q.put(
                    (orig_img, frame_id, bbox, b_score, kp, kp_score, roi))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()

Пример #17

Показать файл

class DetectionLoader:
    def __init__(self, dataloder, batchSize=1):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()
        self.dataloader = dataloder
        self.stopped = False
        self.batchSize = batchSize
        # initialize the queue used to store frames read from
        # the video file

    def forward(self, Q_load, Q_det):
        # keep looping the whole dataset

        while True:
            #print(Q_load.qsize(), Q_det.qsize())
            img, orig_img, im_dim_list = Q_load.get()

            with torch.no_grad():
                # Human Detection
                img = img.cuda()

                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:

                    for k in range(len(orig_img)):
                        if Q_det.full():
                            time.sleep(0.1)
                            #print("detectionloaderQ1 full ")
                        #Q_det.put((orig_img[k],  None, None, None, None, None))
                        Q_det.put((None, orig_img[k], None, None, None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)

                inp = im_to_torch(cv2.cvtColor(orig_img[k], cv2.COLOR_BGR2RGB))
                inps, pt1, pt2 = crop_from_dets(inp, boxes_k, inps, pt1, pt2)

                if Q_det.full():
                    time.sleep(0.1)
                    #print("detectionloaderQ3 full ")
                #Q_det.put((orig_img[k],  boxes_k, scores[dets[:,0]==k], inps, pt1, pt2))
                Q_det.put((inps, orig_img[k], boxes_k, scores[dets[:, 0] == k],
                           pt1, pt2))

Пример #18

Показать файл

class PeopleDetector:
    def __init__(self,
                 confidence=0.5,
                 nms_thresh=0.4,
                 resolution=416,
                 weights_path='yolo/weights/yolov3.weights',
                 cfg_path='yolo/cfg/yolov3.cfg',
                 num_classes=80,
                 names_path='yolo/data/coco.names'):
        self.confidence = confidence
        self.nms_thesh = nms_thresh
        self.weightsfile = weights_path
        self.cfgfile = cfg_path
        self.CUDA = torch.cuda.is_available()
        self.num_classes = num_classes
        self.classes = load_classes(names_path)
        self.model = Darknet(self.cfgfile)
        self.model.load_weights(self.weightsfile)
        self.model.net_info["height"] = resolution
        self.inp_dim = int(self.model.net_info["height"])
        #Check if resolution is multiple of 32
        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32
        # If there's a GPU availible, put the model on GPU
        if self.CUDA:
            self.model.cuda()
        # Set model in evaluation mode
        self.model.eval()

    def prep_image(self, img):
        """
        Prepare image (resize) for inputting to the neural network.
        """
        orig_im = img
        dim = orig_im.shape[1], orig_im.shape[0]
        img = cv2.resize(orig_im, (self.inp_dim, self.inp_dim))
        img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
        img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
        return img_, orig_im, dim

    def writeSingleLabel(self, x, img, paintBoundingBoxies, color=(0, 0, 255)):
        """
        Put label on top of image if it's not inside another Bounding box
        Default label color: red
        """
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())

        #check if the label of the person is inside a label of a painting
        isInside = False
        for i, box in enumerate(paintBoundingBoxies):
            isInside = (box[0] <= c1[0] < c2[0] <= (box[0] + box[2])
                        and box[1] <= c1[1] < c2[1] <= (box[1] + box[3]))
            if isInside:
                break
        if isInside:
            return
        cls = int(x[-1])
        label = "{0}".format(self.classes[cls])
        cv2.rectangle(img, c1, c2, color, 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2, color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                    cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
        return img

    def writLabels(self, origin_im, netOutput, paintBoundingBoxies):
        """
        Put all the labels on top of image if they are no inside another Bounding box
        """
        list(
            map(
                lambda x: self.writeSingleLabel(
                    x, origin_im, paintBoundingBoxies), netOutput))
        return origin_im

    def detectPeopleFromFrame(self, frame):
        """
        Detect people inside a frame and return bounding boxes
        """
        #Prepare imgs compatible with pytorch
        img, orig_im, dim = self.prep_image(frame)

        #Load img on GPU if available
        if self.CUDA:
            img = img.cuda()

        #Inference time
        with torch.no_grad():
            output = self.model(Variable(img), self.CUDA)

        #Collect 3 stage prediction into single one
        output = write_results(output,
                               self.confidence,
                               self.num_classes,
                               nms=True,
                               nms_conf=self.nms_thesh)

        #If No detection...
        if type(output) == int:
            return None

        #If we have detection maintain only people --> people id == 0
        output = output[output[:, -1] < 1]

        #Resize Label according to input frame dimension
        output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(
            self.inp_dim)) / self.inp_dim
        output[:, [1, 3]] *= frame.shape[1]
        output[:, [2, 4]] *= frame.shape[0]

        if output.shape[1] != 8:
            return None

        return output

Пример #19

Показать файл

Файл: demo_my.py Проект: xintao222/PoseDetect

class DetectionLoader:
    def __init__(self, dataloder):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder

    def detect_image(self, im_path):
        im, ori_im, im_name, im_dim_list = self.dataloder.getitem_yolo(im_path)

        with torch.no_grad():
            im = im.cuda()
        prediction = self.det_model(im, CUDA=True)
        # NMS process
        dets = dynamic_write_results(prediction,
                                     opt.confidence,
                                     opt.num_classes,
                                     nms=True,
                                     nms_conf=opt.nms_thesh)
        if isinstance(dets, int) or dets.shape[0] == 0:
            return (ori_im[0], im_name[0], None, None, None, None, None)

        dets = dets.cpu()
        im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long())
        scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0] \
            .view(-1, 1)
        # coordinate transfer
        dets[:, [1, 3]] -= (self.det_inp_dim -
                            scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
        dets[:, [2, 4]] -= (self.det_inp_dim -
                            scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

        dets[:, 1:5] /= scaling_factor
        for j in range(dets.shape[0]):
            dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j,
                                                                            0])
            dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j,
                                                                            1])
        boxes = dets[:, 1:5]
        scores = dets[:, 5:6]

        if boxes.shape[0] > 1:
            boxes = boxes[scores.argmax()].unsqueeze(0)
            scores = scores[scores.argmax()].unsqueeze(0)
            dets = dets[scores.argmax()].unsqueeze(0)
        # len(ori_im) === 1
        for k in range(len(ori_im)):

            boxes_k = boxes[dets[:, 0] == k]
            if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                return (ori_im[k], im_name[k], None, None, None, None, None)
            inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                               opt.inputResW)
            pt1 = torch.zeros(boxes_k.size(0), 2)
            pt2 = torch.zeros(boxes_k.size(0), 2)
            return (ori_im[k], im_name[k], boxes_k, scores[dets[:, 0] == k],
                    inps, pt1, pt2)

Пример #20

Показать файл

class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        # 输入的inp_dim是大于32且可以被32整除的数
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        # keep looping the whole dataset
        for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()

            # print('___________show the dataloader original information_________')
            # print('image name',im_name)
            # print('im_dim_list',im_dim_list)
            # # print('!!!!!!!!!!!!!!!!!!!!!!!!!+++++++++++++++++++++++++++++++++++++++++++')
            # print()
            # print()

            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                # Human Detection
                img = img.cuda()
                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()
                # index_select,第一个参数是索引的对象，第二个参数是如何索引（0是行，1是列），第三个参数是索引的范围
                # 返回到检测到目标的索引的im_dim_list（w,h,w,h）
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                # scaling_factor的每个元素就对应一张图片缩放成416的时候所采用的缩放系数
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    # 将输入det张量每个元素的夹紧到区间 [0,im_dim_list对应的 w,h]，并返回结果到一个新张量
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()

Пример #21

Показать файл

class Alphapose_skeleton:
    def __init__(self, cuda_id=0, fast_yolo=False):

        self.time_det = 0.0
        self.time_run = 0.0

        self.cuda_id = cuda_id
        self.target_kps = [5, 6, 7, 8, 9, 10]

        # Load yolo detection model
        print('Loading YOLO model..')
        if fast_yolo:
            self.det_model = Darknet('./AlphaPose/yolo/cfg/yolov3-tiny.cfg', self.cuda_id)
            self.det_model.load_weights('./AlphaPose/models/yolo/yolov3-tiny.weights')
        else:
            self.det_model = Darknet('./AlphaPose/yolo/cfg/yolov3.cfg', self.cuda_id)
            self.det_model.load_weights('./AlphaPose/models/yolo/yolov3.weights')
            
        self.det_model.cuda(self.cuda_id)
        self.det_model.eval()

        # Load pose model
        print('Loading Alphapose pose model..')
        pose_dataset = Mscoco()
        if args.fast_inference:
            self.pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
        else:
            self.pose_model = InferenNet(4 * 1 + 1, pose_dataset)
        self.pose_model.cuda(self.cuda_id)
        self.pose_model.eval()


    def run(self, folder_or_imglist, sample_rate):
        time_run_start = time.time()

        if type(folder_or_imglist) == 'str':
            inputpath = folder_or_imglist
            print(inputpath)
            args.inputpath = inputpath

            # Load input images
            im_names = [img for img in sorted(os.listdir(inputpath)) if img.endswith('jpg')]
            N = len(im_names)
            dataset = Image_loader(im_names, format='yolo')
        else:
            N = len(folder_or_imglist)
            imglist = [img for i, img in enumerate(folder_or_imglist) if i % sample_rate == 0]
            dataset = Image_loader_from_images(imglist, format='yolo')

        # Load detection loader
        test_loader = DetectionLoader(dataset, self.det_model, self.cuda_id).start()
        skeleton_result_list = []
        for i in range(dataset.__len__()):
            with torch.no_grad():
                (inp, orig_img, im_name, boxes, scores) = test_loader.read()

                if boxes is None or boxes.nelement() == 0:
                    skeleton_result = None
                else:
                    # Pose Estimation
                    time_det_start = time.time()
                    inps, pt1, pt2 = crop_from_dets(inp, boxes)
                    inps = Variable(inps.cuda(self.cuda_id))

                    hm = self.pose_model(inps)
                    hm_data = hm.cpu().data

                    preds_hm, preds_img, preds_scores = getPrediction(
                            hm_data, pt1, pt2, args.inputResH, args.inputResW, args.outputResH, args.outputResW)

                    skeleton_result = pose_nms(boxes, scores, preds_img, preds_scores)
                    self.time_det += (time.time() - time_det_start)

                skeleton_result_list.append(skeleton_result)

        skeleton_list = []
        j = 0
        for i in range(N):
            im_name = 'image_{:05d}.jpg'.format(i+1)

            if (i == sample_rate * (1+j)):
                j += 1
            skeleton_result = skeleton_result_list[j]

            skeleton_list.append([im_name.split('/')[-1]])
            if skeleton_result is not None:
                for human in skeleton_result:
                    kp_preds = human['keypoints']
                    kp_scores = human['kp_score']

                    # ## remove small hand 
                    # if float(kp_scores[9]) < 0.2 and float(kp_scores[10]) < 0.2:
                    #     continue

                    for n in range(kp_scores.shape[0]):
                        skeleton_list[-1].append(int(kp_preds[n, 0]))
                        skeleton_list[-1].append(int(kp_preds[n, 1]))
                        skeleton_list[-1].append(round(float(kp_scores[n]), 2))

        self.time_run += (time.time() - time_run_start)
        return skeleton_list

    def runtime(self):
        return self.time_det, self.time_run

    def save_skeleton(self, skeleton_list, outputpath):

        if not os.path.exists(outputpath):
            os.mkdir(outputpath)

        out_file = open(os.path.join(outputpath, 'skeleton.txt'), 'w')
        for skeleton in skeleton_list:
            out_file.write(' '.join(str(x) for x in skeleton))
            out_file.write('\n')
        out_file.close()

Пример #22

Показать файл

class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet(
            "joints_detectors/Alphapose/yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights(
            'joints_detectors/Alphapose/models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, name="DetectionLoader", args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=(), daemon=True)
            # p = mp.Process(target=self.update, args=())
            # p.daemon = True
            p.start()
        return self

    def update(self):
        while (True):
            sys.stdout.flush()
            print("detection loader len : " + str(self.Q.qsize()))

            # keep looping the whole dataset
            #for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                # Human Detection
                img = img.cuda()
                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:

                    # if self.Q.full():
                    #     time.sleep(2)
                    self.Q.put((orig_img[0], im_name[0], None, None, None,
                                None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            #for k in range(len(orig_img)):
            boxes_k = boxes[dets[:, 0] == 0]
            if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                # if self.Q.full():
                #     time.sleep(2)
                self.Q.put(
                    (orig_img[0], im_name[0], None, None, None, None, None))
                continue
            inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                               opt.inputResW)
            pt1 = torch.zeros(boxes_k.size(0), 2)
            pt2 = torch.zeros(boxes_k.size(0), 2)
            # if self.Q.full():
            #     time.sleep(2)

            self.Q.put((orig_img[0], im_name[0], boxes_k,
                        scores[dets[:, 0] == 0], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()

Пример #23

Показать файл

class DetectionNetwork(object):
    def __init__(self):
        self.confidence = 0.7
        self.nms_thesh = 0.4
        self.resolution = 640
        self.scales = "1,2,3"

        self.confidence = float(self.confidence)
        self.nms_thesh = float(self.nms_thesh)
        self.CUDA = torch.cuda.is_available()
        self.num_classes = 80
        self.classes = load_classes('yolo/data/coco.names')
        print("Loading network.....")
        self.model_detect = Darknet('cfg/yolov3.cfg')
        self.model_detect.load_weights('yolo/yolov3.weights')
        print("Network successfully loaded")
        self.model_detect.net_info["height"] = self.resolution
        self.inp_dim = int(self.model_detect.net_info["height"])
        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32

        if self.CUDA:
            self.model_detect.cuda()

        self.model_detect.eval()
        self.colors = pkl.load(open("yolo/pallete", "rb"))

    def write(self, x, org_img):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        img = org_img
        cls = int(x[-1])
        label = "{0}".format(self.classes[cls])
        color = random.choice(self.colors)
        cv2.rectangle(img, c1, c2, color, 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2, color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                    cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
        return img

    def detect(self, image, im_dim_list):

        # Detection Inference ##########################################################################################
        prediction = self.model_detect(image, True)
        prediction = write_results(prediction,
                                   self.confidence,
                                   self.num_classes,
                                   nms=True,
                                   nms_conf=self.nms_thesh)
        output = prediction

        # Check if something was found....
        if type(prediction) == int:
            return None

        objs = [self.classes[int(x[-1])] for x in output]
        print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
        print("----------------------------------------------------------")

        # Scaling, considering original input resolution ###############################################################
        im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long())

        scaling_factor = torch.min(self.inp_dim / im_dim_list,
                                   1)[0].view(-1, 1)

        output[:,
               [1, 3]] -= (self.inp_dim -
                           scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
        output[:,
               [2, 4]] -= (self.inp_dim -
                           scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

        output[:, 1:5] /= scaling_factor

        for i in range(output.shape[0]):
            output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                            im_dim_list[i, 0])
            output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                            im_dim_list[i, 1])

        return output

    def visualize_outputs(self, detect_output, draw_image):
        # Draw every bounding box iteratively
        for n_f in range(detect_output.size(0)):
            draw_image = self.write(detect_output[n_f, ...], draw_image)
        return draw_image

Пример #24

Показать файл

class VideoDetectionLoader:
    def __init__(self, path, batchSize=4, queueSize=256):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stream = cv2.VideoCapture(path)
        assert self.stream.isOpened(), 'Cannot capture source'
        self.stopped = False
        self.batchSize = batchSize
        self.datalen = int(self.stream.get(cv2.CAP_PROP_FRAME_COUNT))
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queueSize)

    def length(self):
        return self.datalen

    def len(self):
        return self.Q.qsize()

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping the whole video
        for i in range(self.num_batches):
            img = []
            inp = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(i * self.batchSize,
                           min((i + 1) * self.batchSize, self.datalen)):
                (grabbed, frame) = self.stream.read()
                # if the `grabbed` boolean is `False`, then we have
                # reached the end of the video file
                if not grabbed:
                    self.stop()
                    return
                # process and add the frame to the queue
                inp_dim = int(opt.inp_dim)
                img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim)
                inp_k = im_to_torch(orig_img_k)

                img.append(img_k)
                inp.append(inp_k)
                orig_img.append(orig_img_k)
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():
                ht = inp[0].size(1)
                wd = inp[0].size(2)
                # Human Detection
                img = Variable(torch.cat(img)).cuda()
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                im_dim_list = im_dim_list.cuda()

                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(inp)):
                        while self.Q.full():
                            time.sleep(0.2)
                        self.Q.put((inp[k], orig_img[k], None, None))
                    continue

                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5].cpu()
                scores = dets[:, 5:6].cpu()

            for k in range(len(inp)):
                while self.Q.full():
                    time.sleep(0.2)
                self.Q.put((inp[k], orig_img[k], boxes[dets[:, 0] == k],
                            scores[dets[:, 0] == k]))

    def videoinfo(self):
        # indicate the video info
        fourcc = int(self.stream.get(cv2.CAP_PROP_FOURCC))
        fps = self.stream.get(cv2.CAP_PROP_FPS)
        frameSize = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
                     int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        return (fourcc, fps, frameSize)

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def more(self):
        # return True if there are still frames in the queue
        return self.Q.qsize() > 0

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True

Пример #25

Показать файл

class DetectionLoader:
    def __init__(self,
                 dataset,
                 det_model=None,
                 cuda_id=None,
                 batchSize=4,
                 queueSize=256):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        if det_model is None:
            self.det_model = Darknet('yolo/cfg/yolov3.cfg')
            self.det_model.load_weights('models/yolo/yolov3.weights')
            self.det_model.net_info['height'] = opt.inp_dim
            self.det_inp_dim = int(self.det_model.net_info['height'])
            assert self.det_inp_dim % 32 == 0
            assert self.det_inp_dim > 32
            self.det_model.cuda()
            self.det_model.eval()
        else:
            self.det_model = det_model
            self.det_model.net_info['height'] = opt.inp_dim
            self.det_inp_dim = int(self.det_model.net_info['height'])
            assert self.det_inp_dim % 32 == 0
            assert self.det_inp_dim > 32

        self.cuda_id = cuda_id

        self.stopped = False
        self.dataset = dataset
        self.batchSize = batchSize
        self.datalen = self.dataset.__len__()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping the whole dataset
        for i in range(self.num_batches):
            img = []
            inp = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(i * self.batchSize,
                           min((i + 1) * self.batchSize, self.datalen)):
                img_k, inp_k, orig_img_k, im_name_k, im_dim_list_k = self.dataset.__getitem__(
                    k)
                img.append(img_k)
                inp.append(inp_k)
                orig_img.append(orig_img_k)
                im_name.append(im_name_k)
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():
                ht = inp[0].size(1)
                wd = inp[0].size(2)
                # Human Detection
                if self.cuda_id is None:
                    img = Variable(torch.cat(img)).cuda()
                else:
                    img = Variable(torch.cat(img)).cuda(self.cuda_id)
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                if self.cuda_id is None:
                    im_dim_list = im_dim_list.cuda()
                else:
                    im_dim_list = im_dim_list.cuda(self.cuda_id)

                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh,
                                             cuda_id=self.cuda_id)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(inp)):
                        while self.Q.full():
                            time.sleep(0.2)
                        self.Q.put(
                            (inp[k], orig_img[k], im_name[k], None, None))
                    continue

                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5].cpu()
                scores = dets[:, 5:6].cpu()

            for k in range(len(inp)):
                while self.Q.full():
                    time.sleep(0.2)
                self.Q.put((inp[k], orig_img[k], im_name[k],
                            boxes[dets[:, 0] == k], scores[dets[:, 0] == k]))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()

Пример #26

Показать файл

class MSRApose_skeleton():
    def __init__(self, cuda_id=0, fast_yolo=False):

        self.time_det = 0.0
        self.time_run = 0.0

        self.num_joints = 17
        self.target_kps = [5, 6, 7, 8, 9, 10]

        # Load yolo detection model
        print('Loading YOLO model..')
        if fast_yolo:
            self.det_model = Darknet('./AlphaPose/yolo/cfg/yolov3-tiny.cfg')
            self.det_model.load_weights(
                './AlphaPose/models/yolo/yolov3-tiny.weights')
        else:
            self.det_model = Darknet("./AlphaPose/yolo/cfg/yolov3.cfg")
            self.det_model.load_weights(
                './AlphaPose/models/yolo/yolov3.weights')

        self.det_model.cuda()
        self.det_model.eval()

        cfg_file = 'MSRAPose/experiments/coco/resnet50/256x192_d256x3_adam_lr1e-3.yaml'
        model_file = 'MSRAPose/models/pytorch/pose_coco/pose_resnet_50_256x192.pth.tar'

        # update config
        update_config(cfg_file)
        config.TEST.MODEL_FILE = model_file

        # cudnn related setting
        cudnn.benchmark = config.CUDNN.BENCHMARK
        torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
        torch.backends.cudnn.enabled = config.CUDNN.ENABLED

        # load pre-trained model
        self.model = eval('models_msra.' + config.MODEL.NAME +
                          '.get_pose_net')(config, is_train=False)
        print('Loading MSRA pose model..')
        print('=> loading model from {}'.format(config.TEST.MODEL_FILE))
        self.model.load_state_dict(torch.load(config.TEST.MODEL_FILE))

        gpus = [int(i) for i in config.GPUS.split(',')]
        self.model = torch.nn.DataParallel(self.model, device_ids=gpus).cuda()
        self.model.eval()

        # image transform
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
        ])

    def _box2cs(self, box, image_width, image_height):
        x, y, w, h = box[:4]
        return self._xywh2cs(x, y, w, h, image_width, image_height)

    def _xywh2cs(self, x, y, w, h, image_width, image_height):
        center = np.zeros((2), dtype=np.float32)
        center[0] = x + w * 0.5
        center[1] = y + h * 0.5

        aspect_ratio = image_width * 1.0 / image_height
        pixel_std = 200

        if w > aspect_ratio * h:
            h = w * 1.0 / aspect_ratio
        elif w < aspect_ratio * h:
            w = h * aspect_ratio
        scale = np.array([w * 1.0 / pixel_std, h * 1.0 / pixel_std],
                         dtype=np.float32)
        if center[0] != -1:
            scale = scale * 1.25

        return center, scale

    def detect_skeleton_on_single_human(self, image, box):
        '''
        input: image read by opencv2
        '''

        data_numpy = image.copy()

        # object detection box
        if box is None:
            box = [0, 0, data_numpy.shape[0], data_numpy.shape[1]]
        c, s = self._box2cs(box, data_numpy.shape[0], data_numpy.shape[1])
        r = 0

        trans = get_affine_transform(c, s, r, config.MODEL.IMAGE_SIZE)
        input = cv2.warpAffine(
            data_numpy,
            trans,
            (int(config.MODEL.IMAGE_SIZE[0]), int(config.MODEL.IMAGE_SIZE[1])),
            flags=cv2.INTER_LINEAR)

        input = self.transform(input).unsqueeze(0)

        with torch.no_grad():
            # compute output heatmap
            output = self.model(input)
            output = output.clone().cpu().numpy()

            # heatmap = output
            # heatmap_hand = heatmap[0][self.target_kps[0]]
            # print(heatmap.shape)
            # for kk in self.target_kps[1:]:
            #     heatmap_hand += heatmap[0][kk]
            # cv2.imshow('skeletons', heatmap_hand)
            # cv2.waitKey()

            # compute coordinate
            preds, maxvals = get_final_preds(config, output, np.asarray([c]),
                                             np.asarray([s]))

            return preds[0]

    def run(self, folder_or_imglist, sample_rate):
        time_run_start = time.time()

        if type(folder_or_imglist) == 'str':
            inputpath = folder_or_imglist
            print(inputpath)
            args.inputpath = inputpath

            # Load input images
            im_names = [
                img for img in sorted(os.listdir(inputpath))
                if img.endswith('jpg')
            ]
            dataset = Image_loader(im_names, format='yolo')
        else:
            imglist = folder_or_imglist
            dataset = Image_loader_from_images(imglist, format='yolo')

        # Load detection loader
        test_loader = DetectionLoader(dataset, self.det_model).start()

        skeleton_list = []
        # final_result = []
        for i in range(dataset.__len__()):
            with torch.no_grad():
                (inp, orig_img, im_name, boxes, scores) = test_loader.read()

                skeleton_result = []
                if boxes is None or boxes.nelement() == 0:
                    skeleton_result = None
                else:
                    # Pose Estimation
                    time_det_start = time.time()
                    for box in boxes.tolist():
                        x1, y1, x2, y2 = int(box[0]), int(box[1]), int(
                            box[2]), int(box[3])
                        box = [x1, y1, x2 - x1, y2 - y1]
                        skeleton_result.append(
                            self.detect_skeleton_on_single_human(
                                orig_img, box))
                    self.time_det += (time.time() - time_det_start)

                skeleton_list.append([im_name.split('/')[-1]])
                if skeleton_result is not None:
                    for human in skeleton_result:
                        for mat in human:
                            skeleton_list[-1].append(int(mat[0]))
                            skeleton_list[-1].append(int(mat[1]))
                            skeleton_list[-1].append(0.8)

        self.time_run += (time.time() - time_run_start)
        return skeleton_list

    def runtime(self):
        return self.time_det, self.time_run

    def generate_target_points(self, joints, image_size, sigma):
        '''
        :param joints:  [num_joints, 3]
        :param joints_vis: [num_joints, 3]
        :return: target, target_weight(1: visible, 0: invisible)
        '''
        # target_weight = np.ones((self.num_joints, 1), dtype=np.float32)
        # target_weight[:, 0] = joints_vis[:, 0]

        target = np.zeros((self.num_joints, image_size[1], image_size[0]),
                          dtype=np.float32)

        tmp_size = sigma * 3

        for joint_id in range(self.num_joints):
            feat_stride = [1, 1]  #image_size / image_size
            mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
            mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
            # Check that any part of the gaussian is in-bounds
            ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
            br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
            if ul[0] >= image_size[0] or ul[1] >= image_size[1] \
                    or br[0] < 0 or br[1] < 0:
                # If not, just return the image as is
                # target_weight[joint_id] = 0
                continue

            # # Generate gaussian
            size = 2 * tmp_size + 1
            x = np.arange(0, size, 1, np.float32)
            y = x[:, np.newaxis]
            x0 = y0 = size // 2
            # The gaussian is not normalized, we want the center value to equal 1
            g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))

            # Usable gaussian range
            g_x = max(0, -ul[0]), min(br[0], image_size[0]) - ul[0]
            g_y = max(0, -ul[1]), min(br[1], image_size[1]) - ul[1]
            # Image range
            img_x = max(0, ul[0]), min(br[0], image_size[0])
            img_y = max(0, ul[1]), min(br[1], image_size[1])

            v = 1  #target_weight[joint_id]
            if v > 0.5:
                target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
                    g[g_y[0]:g_y[1], g_x[0]:g_x[1]]

        return target  #, target_weight

    def generate_target_lines(self, joints, image_size, target_kps):

        l_pair = [
            (0, 1),
            (0, 2),
            (1, 3),
            (2, 4),  # Head
            (5, 6),
            (5, 7),
            (7, 9),
            (6, 8),
            (8, 10),  # Hand
            (17, 11),
            (17, 12),  # Body
            (11, 13),
            (12, 14),
            (13, 15),
            (14, 16)
        ]  # Leg

        line_color = [(0, 215, 255), (0, 255, 204), (0, 134, 255),
                      (0, 255, 50), (77, 255, 222), (77, 196, 255),
                      (77, 135, 255), (191, 255, 77), (77, 255, 77),
                      (77, 222, 255), (255, 156, 127), (0, 127, 255),
                      (255, 127, 77), (0, 77, 255), (255, 77, 36)]

        # Nose, LEye, REye, LEar, REar
        # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist
        # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck
        p_color = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255),
                   (0, 255, 0), (77, 255, 255), (77, 255, 204), (77, 204, 255),
                   (191, 255, 77), (77, 191, 255), (191, 255, 77),
                   (204, 77, 255), (77, 255, 204), (191, 77, 255),
                   (77, 255, 191), (127, 77, 255), (77, 255, 127),
                   (0, 255, 255)]

        img = np.zeros(shape=image_size, dtype='uint8')
        part_line = {}
        for n in range(self.num_joints):
            # if float(kp_scores_h[n]) <= 0.05:
            #     continue

            cor_x, cor_y = int(joints[n][0]), int(joints[n][1])
            part_line[n] = (cor_x, cor_y)
            # cv2.circle(img, (cor_x, cor_y), 4, p_color[n], -1)

        # Draw limbs
        for i, (start_p, end_p) in enumerate(l_pair):
            if i not in target_kps:
                continue

            if start_p in part_line and end_p in part_line:
                start_xy = part_line[start_p]
                end_xy = part_line[end_p]
                cv2.line(img, start_xy, end_xy, line_color[i], 5)
        return img

Пример #27

Показать файл

class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        # self.det_inp_dim = int(self.det_model.net_info['height'])
        self.det_inp_dim = int(opt.inp_dim)
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        # keep looping the whole dataset
        from mtcnn.mtcnn import MTCNN
        detector = MTCNN()
        for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                if self.dataloder.format == 'yolo':
                    # Human Detection
                    img = img.cuda()
                    prediction = self.det_model(img, CUDA=True)
                    # NMS process
                    dets = dynamic_write_results(prediction,
                                                 opt.confidence,
                                                 opt.num_classes,
                                                 nms=True,
                                                 nms_conf=opt.nms_thesh)

                elif self.dataloder.format == 'mtcnn':
                    # Face detection
                    imgs_np = img.float().mul(255.0).cpu().numpy()
                    imgs_np = np.squeeze(imgs_np, axis=0)
                    imgs_np = np.transpose(imgs_np, (1, 2, 0))
                    dets = detector.detect_faces(imgs_np)
                    fac_det = []
                    for det in dets:
                        fac_det.append([
                            0, det["box"][0], det["box"][1],
                            det["box"][0] + det["box"][2],
                            det["box"][1] + det["box"][3], det["confidence"],
                            0.99, 0
                        ])
                    dets = torch.tensor(fac_det)

                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()

Пример #28

Показать файл

Файл: video.py Проект: liuslnlp/YOLO-v3-PyTorch

    num_classes = 80
    bbox_attrs = 5 + num_classes

    model = Darknet(args.cfgfile, height=args.reso)
    model.load_state_dict(torch.load(args.weightsfile))

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    cap = cv2.VideoCapture(args.video)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            img, orig_im, dim = prep_image(frame, inp_dim)
            im_dim = torch.FloatTensor(dim).repeat(1, 2)
            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

Пример #29

Показать файл

Файл: dataloader.py Проект: peter-yys-yoon/ict2019

class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        # keep looping the whole dataset
        """

        :return:
        """
        for i in range(self.num_batches):  # repeat

            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()

            # img = (batch, frames)
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return
            start_time = getTime()
            with torch.no_grad():
                # Human Detection

                img = img.cuda()  # image ( B, 3, 608,608 )
                prediction = self.det_model(img, CUDA=True)

                # ( B, 22743, 85 ) = ( batchsize, proposal boxes, xywh+cls)
                # predictions for each B image.

                # NMS process
                carperson = dynamic_write_results(prediction, opt.confidence, opt.num_classes, nms=True,
                                                  nms_conf=opt.nms_thesh)
                if isinstance(carperson, int) or carperson.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(0.5)
                        self.Q.put((orig_img[k], im_name[k], None, None, None, None, None, None))  # 8 elements
                    continue

                ckpt_time, det_time = getTime(start_time)

                carperson = carperson.cpu()  # (1) k-th image , (7) x,y,w,h,c, cls_score, cls_index
                im_dim_list = torch.index_select(im_dim_list, 0, carperson[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

                # coordinate transfer
                carperson[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
                carperson[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

                carperson[:, 1:5] /= scaling_factor
                for j in range(carperson.shape[0]):
                    carperson[j, [1, 3]] = torch.clamp(carperson[j, [1, 3]], 0.0, im_dim_list[j, 0])
                    carperson[j, [2, 4]] = torch.clamp(carperson[j, [2, 4]], 0.0, im_dim_list[j, 1])

                cls_car_mask = carperson * (carperson[:, -1] == 2).float().unsqueeze(1)  # car
                class__car_mask_ind = torch.nonzero(cls_car_mask[:, -2]).squeeze()
                car_dets = carperson[class__car_mask_ind].view(-1, 8)

                cls_person_mask = carperson * (carperson[:, -1] == 0).float().unsqueeze(1)  # person
                class__person_mask_ind = torch.nonzero(cls_person_mask[:, -2]).squeeze()
                hm_dets = carperson[class__person_mask_ind].view(-1, 8)

                ckpt_time, masking_time = getTime(ckpt_time)

            hm_boxes, hm_scores = None, None

            if hm_dets.size(0) > 0:
                hm_boxes = hm_dets[:, 1:5]
                hm_scores = hm_dets[:, 5:6]

            car_box_conf = None
            if car_dets.size(0) > 0:
                car_box_conf = car_dets

            for k in range(len(orig_img)):  # for k-th image detection.

                if car_box_conf is None:
                    car_k = None
                else:
                    car_k = car_box_conf[car_box_conf[:, 0] == k].numpy()
                    car_k = car_k[np.where(car_k[:, 5] > 0.2)]  # TODO check here, cls or bg/fg confidence?
                    # car_k = non_max_suppression_fast(car_k, overlapThresh=0.3)  # TODO check here, NMS

                if hm_boxes is not None:
                    hm_boxes_k = hm_boxes[hm_dets[:, 0] == k]
                    hm_scores_k = hm_scores[hm_dets[:, 0] == k]
                    inps = torch.zeros(hm_boxes_k.size(0), 3, opt.inputResH, opt.inputResW)
                    pt1 = torch.zeros(hm_boxes_k.size(0), 2)
                    pt2 = torch.zeros(hm_boxes_k.size(0), 2)
                    item = (orig_img[k], im_name[k], hm_boxes_k, hm_scores_k, inps, pt1, pt2, car_k)
                    # print('video processor ', 'image' , im_name[k] , 'hm box ' , hm_boxes_k.size())
                    
                
                else:
                    item = (orig_img[k], im_name[k], None, None, None, None, None, car_k)  # 8-elemetns

                if self.Q.full():
                    time.sleep(0.5)
                self.Q.put(item)

            ckpt_time, distribute_time = getTime(ckpt_time)

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()

Пример #30

Показать файл

Файл: main.py Проект: safecorners/cerule-stream

def main():
    global args
    args = parser.parse_args()

    # Yolo
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    CUDA = torch.cuda.is_available()

    num_classes = 80
    bbox_attrs = 5 + num_classes

    model = Darknet(args.config_file)
    model.load_weights(args.weights_file)

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    # Connect
    client = paho.Client()
    host, port = args.broker_url.split(':')
    client.connect(host, int(port))

    # subscribe a system messages
    client.message_callback_add("$SYS/#", system_message)
    client.subscribe("$SYS/#")

    # Open rtsp stream
    cap = cv2.VideoCapture(args.input_url)

    assert cap.isOpened(), 'Cannot capture source {}'.format(args.input_url)

    # Inspect input stream
    input_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    input_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    input_fps = cap.get(cv2.CAP_PROP_FPS)
    print("[input stream] width: {}, height: {}, fps: {}".format(
        input_width, input_height, input_fps))

    # Open output stream
    output_command = stream_factory(args.output_url, input_width, input_height,
                                    input_fps)
    print(output_command)
    output_stream = sp.Popen(output_command, stdin=sp.PIPE, stderr=sp.PIPE)

    frames = 0
    start = time.time()

    while cap.isOpened():
        ret, frame = cap.read()  # frame size: 640x360x3(=691200)
        if ret:
            # Our detect operations on the frame come here

            img, orig_im, dim = prep_image(frame, inp_dim)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                print("FPS of the video is {:5.2f}".format(
                    frames / (time.time() - start)))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0,
                                         float(inp_dim)) / inp_dim

            output[:, [1, 3]] *= frame.shape[1]
            output[:, [2, 4]] *= frame.shape[0]

            classes = load_classes('yolo/data/coco.names')
            colors = pkl.load(open("yolo/pallete", "rb"))

            # Overlay on screen
            list(map(lambda x: write(x, orig_im, classes, colors), output))
            # Send a BBoxes

            # Display the resulting frame
            cv2.imshow("frame", orig_im)
            frames += 1
            print("FPS of the video is {:5.2f}, size: {}".format(
                frames / (time.time() - start), orig_im.size))

            # Write rtmp stream
            output_stream.stdin.write(frame.tostring())
        else:
            break
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Close
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()