class WebcamDetectionLoader:
    def __init__(self, webcam, batchSize=1, queueSize=256):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stream = cv2.VideoCapture(webcam)
        assert self.stream.isOpened(), 'Cannot open webcam'
        self.stopped = False
        self.batchSize = batchSize

        # initialize the queue used to store frames read from
        # the video file
        self.Q = LifoQueue(maxsize=queueSize)

    def len(self):
        return self.Q.qsize()

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping
        while True:
            img = []
            inp = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(self.batchSize):
                (grabbed, frame) = self.stream.read()
                if not grabbed:
                    continue
                # process and add the frame to the queue
                inp_dim = int(opt.inp_dim)
                img_k, orig_img_k, im_dim_list_k = prep_frame(frame, inp_dim)
                inp_k = im_to_torch(orig_img_k)

                img.append(img_k)
                inp.append(inp_k)
                orig_img.append(orig_img_k)
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():
                ht = inp[0].size(1)
                wd = inp[0].size(2)
                # Human Detection
                img = Variable(torch.cat(img)).cuda()
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                im_dim_list = im_dim_list.cuda()

                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(inp)):
                        if self.Q.full():
                            with self.Q.mutex:
                                self.Q.queue.clear()
                        self.Q.put((inp[k], orig_img[k], None, None))
                    continue

                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5].cpu()
                scores = dets[:, 5:6].cpu()

            for k in range(len(inp)):
                if self.Q.full():
                    with self.Q.mutex:
                        self.Q.queue.clear()
                self.Q.put((inp[k], orig_img[k], boxes[dets[:, 0] == k],
                            scores[dets[:, 0] == k]))

    def videoinfo(self):
        # indicate the video info
        fourcc = int(self.stream.get(cv2.CAP_PROP_FOURCC))
        fps = self.stream.get(cv2.CAP_PROP_FPS)
        frameSize = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
                     int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        return (fourcc, fps, frameSize)

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def more(self):
        # return True if there are still frames in the queue
        return self.Q.qsize() > 0

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
示例#2
0
class DetectionLoader:
    def __init__(self, dataloder, batchSize=25, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet(
            "joints_detectors/Alphapose/yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights(
            'joints_detectors/Alphapose/models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model  #.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, name="DetectionLoader", args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=(), daemon=True)
            # p = mp.Process(target=self.update, args=())
            # p.daemon = True
            p.start()
        return self

    def update(self):
        while (True):
            sys.stdout.flush()
            print("detection loader len : " + str(self.Q.qsize()))

            # keep looping the whole dataset
            #for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                # Human Detection
                img = img  #.cuda()
                prediction = self.det_model(img, CUDA=False)  #True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:

                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[0], im_name[0], None, None, None,
                                None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            #for k in range(len(orig_img)):
            boxes_k = boxes[dets[:, 0] == 0]
            if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                if self.Q.full():
                    time.sleep(2)
                self.Q.put(
                    (orig_img[0], im_name[0], None, None, None, None, None))
                continue
            inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                               opt.inputResW)
            pt1 = torch.zeros(boxes_k.size(0), 2)
            pt2 = torch.zeros(boxes_k.size(0), 2)
            if self.Q.full():
                time.sleep(2)
            self.Q.put((orig_img[0], im_name[0], boxes_k,
                        scores[dets[:, 0] == 0], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        # initialize the queue used to store frames read from
        # the video file
        self.Q = LifoQueue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping the whole dataset
        while True:
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            with self.dataloder.Q.mutex:
                self.dataloder.Q.queue.clear()
            with torch.no_grad():
                # Human Detection
                img = img.cuda()
                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
示例#4
0
        "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
        default="416",
        type=str)
    return parser.parse_args()


cap = cv2.VideoCapture(
    '/home/gaurav/Desktop/sem6/VR/before_midsem/mini_project/harsh_without_mask.mp4'
)
args = arg_parse()
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
num_classes = 80
bbox_attrs = 5 + num_classes
yolo = Darknet(args.cfgfile)
yolo.load_weights(args.weightsfile)
yolo.net_info["height"] = args.reso
inp_dim = int(yolo.net_info["height"])
assert inp_dim % 32 == 0
assert inp_dim > 32
yolo.cuda()

PATH = '/home/gaurav/Desktop/sem6/VR/before_midsem/mini_project/model/classi4.pkl'
net = models.alexnet(pretrained=True)
my_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(my_device)

loaded_model = pickle.load(open(PATH, 'rb'))
tot_cnt = 0
cnt = 0
out = cv2.VideoWriter('./output_harsh_without_mask.avi',
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        cfg_path = "yolo/cfg/yolov3-single.cfg"
        weights_path = 'models/yolo/01.weights'
        self.det_model = Darknet(cfg_path, reso=int(opt.inp_dim))
        self.det_model.load_weights(weights_path)
        print("Loading cfg from", cfg_path)
        print("Loading weights from", weights_path)
        self.det_model.eval()
        self.det_model.net_info['height'] = opt.inp_dim  #input_dimension
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        # keep looping the whole dataset
        for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                img = img.cuda()
                # Critical, use yolo to do object detection here!
                prediction = self.det_model(img)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()

                # Modified by @penggao
                # Scale for SIXD dataset

                reso = self.det_inp_dim
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                w, h = im_dim_list[:, 0], im_dim_list[:, 1]
                w_ratio = w / reso
                h_ratio = h / reso
                boxes = dets[:, 1:5]
                boxes[:, 0] = boxes[:, 0] * w_ratio
                boxes[:, 1] = boxes[:, 1] * h_ratio
                boxes[:, 2] = boxes[:, 2] * w_ratio
                boxes[:, 3] = boxes[:, 3] * h_ratio
                scores = dets[:, 5:6]

                # im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long())
                # scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

                # # coordinate transfer
                # dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
                # dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

                # dets[:, 1:5] /= scaling_factor
                # for j in range(dets.shape[0]):
                #     dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
                #     dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
                # boxes = dets[:, 1:5]
                # scores = dets[:, 5:6]

            img = Image.open(im_name[0])
            draw = ImageDraw.Draw(img)
            for i in range(boxes.shape[0]):
                x1, y1, x2, y2 = boxes[i, 0], boxes[i, 1], boxes[i,
                                                                 2], boxes[i,
                                                                           3]
                objectness = 'conf: %.2f' % scores
                draw.rectangle((x1, y1, x2, y2), outline='red')

            # img.save(im_name[0].replace('rgb', 'results'))

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
示例#6
0
class DetectionLoader:
    def __init__(self,
                 dataset,
                 det_model=None,
                 cuda_id=None,
                 batchSize=4,
                 queueSize=256):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        if det_model is None:
            self.det_model = Darknet('yolo/cfg/yolov3.cfg')
            self.det_model.load_weights('models/yolo/yolov3.weights')
            self.det_model.net_info['height'] = opt.inp_dim
            self.det_inp_dim = int(self.det_model.net_info['height'])
            assert self.det_inp_dim % 32 == 0
            assert self.det_inp_dim > 32
            self.det_model.cuda()
            self.det_model.eval()
        else:
            self.det_model = det_model
            self.det_model.net_info['height'] = opt.inp_dim
            self.det_inp_dim = int(self.det_model.net_info['height'])
            assert self.det_inp_dim % 32 == 0
            assert self.det_inp_dim > 32

        self.cuda_id = cuda_id

        self.stopped = False
        self.dataset = dataset
        self.batchSize = batchSize
        self.datalen = self.dataset.__len__()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        self.Q = Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping the whole dataset
        for i in range(self.num_batches):
            img = []
            inp = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(i * self.batchSize,
                           min((i + 1) * self.batchSize, self.datalen)):
                img_k, inp_k, orig_img_k, im_name_k, im_dim_list_k = self.dataset.__getitem__(
                    k)
                img.append(img_k)
                inp.append(inp_k)
                orig_img.append(orig_img_k)
                im_name.append(im_name_k)
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():
                ht = inp[0].size(1)
                wd = inp[0].size(2)
                # Human Detection
                if self.cuda_id is None:
                    img = Variable(torch.cat(img)).cuda()
                else:
                    img = Variable(torch.cat(img)).cuda(self.cuda_id)
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                if self.cuda_id is None:
                    im_dim_list = im_dim_list.cuda()
                else:
                    im_dim_list = im_dim_list.cuda(self.cuda_id)

                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh,
                                             cuda_id=self.cuda_id)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(inp)):
                        while self.Q.full():
                            time.sleep(0.2)
                        self.Q.put(
                            (inp[k], orig_img[k], im_name[k], None, None))
                    continue

                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5].cpu()
                scores = dets[:, 5:6].cpu()

            for k in range(len(inp)):
                while self.Q.full():
                    time.sleep(0.2)
                self.Q.put((inp[k], orig_img[k], im_name[k],
                            boxes[dets[:, 0] == k], scores[dets[:, 0] == k]))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
示例#7
0
class DetectionLoader:
    def __init__(self, dataloder):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder

    def detect_image(self, im_path):
        im, ori_im, im_name, im_dim_list = self.dataloder.getitem_yolo(im_path)

        with torch.no_grad():
            im = im.cuda()
        prediction = self.det_model(im, CUDA=True)
        # NMS process
        dets = dynamic_write_results(prediction, opt.confidence,
                                     opt.num_classes, nms=True,
                                     nms_conf=opt.nms_thesh)
        if isinstance(dets, int) or dets.shape[0] == 0:
            return (ori_im[0], im_name[0], None, None, None, None, None)

        dets = dets.cpu()
        im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long())
        scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0] \
            .view(-1, 1)
        # coordinate transfer
        dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
        dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

        dets[:, 1: 5] /= scaling_factor
        for j in range(dets.shape[0]):
            dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                          im_dim_list[j, 0])
            dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                          im_dim_list[j, 1])
        boxes = dets[:, 1:5]
        scores = dets[:, 5:6]

        if boxes.shape[0] > 1:
            boxes = boxes[scores.argmax()].unsqueeze(0)
            scores = scores[scores.argmax()].unsqueeze(0)
            dets = dets[scores.argmax()].unsqueeze(0)
        # len(ori_im) === 1
        for k in range(len(ori_im)):

            boxes_k = boxes[dets[:, 0] == k]
            if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                return (ori_im[k], im_name[k], None, None, None, None, None)
            inps = torch.zeros(boxes_k.size(0), 3,
                               opt.inputResH, opt.inputResW)
            pt1 = torch.zeros(boxes_k.size(0), 2)
            pt2 = torch.zeros(boxes_k.size(0), 2)
            return (ori_im[k], im_name[k], boxes_k, scores[dets[:, 0] == k],
                    inps, pt1, pt2)
示例#8
0
class YOLODetector(BaseDetector):
    def __init__(self, cfg, opt=None):
        super(YOLODetector, self).__init__()

        self.detector_cfg = cfg
        self.detector_opt = opt
        self.model_cfg = cfg.get('CONFIG', 'detector/yolo/cfg/yolov3-spp.cfg')
        self.model_weights = cfg.get('WEIGHTS',
                                     'detector/yolo/data/yolov3-spp.weights')
        self.inp_dim = cfg.get('INP_DIM', 608)
        self.nms_thres = cfg.get('NMS_THRES', 0.6)
        self.confidence = cfg.get('CONFIDENCE', 0.05)
        self.num_classes = cfg.get('NUM_CLASSES', 80)
        self.model = None
        self.load_model()

    def load_model(self):
        args = self.detector_opt

        print('Loading YOLO model..')
        self.model = Darknet(self.model_cfg)
        self.model.load_weights(self.model_weights)
        self.model.net_info['height'] = self.inp_dim
        print("Network successfully loaded")

        if args:
            if len(args.gpus) > 1:
                self.model = torch.nn.DataParallel(self.model,
                                                   device_ids=args.gpus).to(
                                                       args.device)
            else:
                self.model.to(args.device)
        else:
            self.model.cuda()
        self.model.eval()

    def image_preprocess(self, img_source):
        """
        Pre-process the img before fed to the object detection network
        Input: image name(str) or raw image data(ndarray or torch.Tensor,channel GBR)
        Output: pre-processed image data(torch.FloatTensor,(1,3,h,w))
        """
        if isinstance(img_source, str):
            img, orig_img, im_dim_list = prep_image(img_source, self.inp_dim)
        elif isinstance(img_source, torch.Tensor) or isinstance(
                img_source, np.ndarray):
            img, orig_img, im_dim_list = prep_frame(img_source, self.inp_dim)
        else:
            raise IOError('Unknown image source type: {}'.format(
                type(img_source)))

        return img

    def images_detection(self, imgs, orig_dim_list):
        """
        Feed the img data into object detection network and 
        collect bbox w.r.t original image size
        Input: imgs(torch.FloatTensor,(b,3,h,w)): pre-processed mini-batch image input
               orig_dim_list(torch.FloatTensor, (b,(w,h,w,h))): original mini-batch image size
        Output: dets(torch.cuda.FloatTensor,(n,(batch_idx,x1,y1,x2,y2,c,s,idx of cls))): object detection results
        """
        args = self.detector_opt
        _CUDA = True
        if args:
            if args.gpus[0] < 0:
                _CUDA = False
        if not self.model:
            self.load_model()
        with torch.no_grad():
            imgs = imgs.to(args.device) if args else imgs.cuda()
            prediction = self.model(imgs, args=args)
            #do nms to the detection results, only human category is left
            dets = self.dynamic_write_results(prediction,
                                              self.confidence,
                                              self.num_classes,
                                              nms=True,
                                              nms_conf=self.nms_thres)

            if isinstance(dets, int) or dets.shape[0] == 0:
                return 0
            dets = dets.cpu()

            orig_dim_list = torch.index_select(orig_dim_list, 0,
                                               dets[:, 0].long())
            scaling_factor = torch.min(self.inp_dim / orig_dim_list,
                                       1)[0].view(-1, 1)
            dets[:, [1, 3]] -= (self.inp_dim - scaling_factor *
                                orig_dim_list[:, 0].view(-1, 1)) / 2
            dets[:, [2, 4]] -= (self.inp_dim - scaling_factor *
                                orig_dim_list[:, 1].view(-1, 1)) / 2
            dets[:, 1:5] /= scaling_factor
            for i in range(dets.shape[0]):
                dets[i, [1, 3]] = torch.clamp(dets[i, [1, 3]], 0.0,
                                              orig_dim_list[i, 0])
                dets[i, [2, 4]] = torch.clamp(dets[i, [2, 4]], 0.0,
                                              orig_dim_list[i, 1])

            return dets

    def dynamic_write_results(self,
                              prediction,
                              confidence,
                              num_classes,
                              nms=True,
                              nms_conf=0.4):
        prediction_bak = prediction.clone()
        dets = self.write_results(prediction.clone(), confidence, num_classes,
                                  nms, nms_conf)
        if isinstance(dets, int):
            return dets

        if dets.shape[0] > 100:
            nms_conf -= 0.05
            dets = self.write_results(prediction_bak.clone(), confidence,
                                      num_classes, nms, nms_conf)

        return dets

    def write_results(self,
                      prediction,
                      confidence,
                      num_classes,
                      nms=True,
                      nms_conf=0.4):
        args = self.detector_opt
        #prediction: (batchsize, num of objects, (xc,yc,w,h,box confidence, 80 class scores))
        conf_mask = (prediction[:, :, 4] >
                     confidence).float().float().unsqueeze(2)
        prediction = prediction * conf_mask

        try:
            ind_nz = torch.nonzero(prediction[:, :,
                                              4]).transpose(0, 1).contiguous()
        except:
            return 0

        #the 3rd channel of prediction: (xc,yc,w,h)->(x1,y1,x2,y2)
        box_a = prediction.new(prediction.shape)
        box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2)
        box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2)
        box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2)
        box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2)
        prediction[:, :, :4] = box_a[:, :, :4]

        batch_size = prediction.size(0)

        output = prediction.new(1, prediction.size(2) + 1)
        write = False
        num = 0
        for ind in range(batch_size):
            #select the image from the batch
            image_pred = prediction[ind]

            #Get the class having maximum score, and the index of that class
            #Get rid of num_classes softmax scores
            #Add the class index and the class score of class having maximum score
            max_conf, max_conf_score = torch.max(
                image_pred[:, 5:5 + num_classes], 1)
            max_conf = max_conf.float().unsqueeze(1)
            max_conf_score = max_conf_score.float().unsqueeze(1)
            seq = (image_pred[:, :5], max_conf, max_conf_score)
            #image_pred:(n,(x1,y1,x2,y2,c,s,idx of cls))
            image_pred = torch.cat(seq, 1)

            #Get rid of the zero entries
            non_zero_ind = (torch.nonzero(image_pred[:, 4]))

            image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7)

            #Get the various classes detected in the image
            try:
                img_classes = unique(image_pred_[:, -1])
            except:
                continue

            #WE will do NMS classwise
            #print(img_classes)
            for cls in img_classes:
                if cls == 0:
                    continue
                #get the detections with one particular class
                cls_mask = image_pred_ * (image_pred_[:, -1]
                                          == cls).float().unsqueeze(1)
                class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze()

                image_pred_class = image_pred_[class_mask_ind].view(-1, 7)

                #sort the detections such that the entry with the maximum objectness
                #confidence is at the top
                conf_sort_index = torch.sort(image_pred_class[:, 4],
                                             descending=True)[1]
                image_pred_class = image_pred_class[conf_sort_index]
                idx = image_pred_class.size(0)

                #if nms has to be done
                if nms:
                    if platform.system() != 'Windows':
                        #We use faster rcnn implementation of nms (soft nms is optional)
                        nms_op = getattr(nms_wrapper, 'nms')
                        #nms_op input:(n,(x1,y1,x2,y2,c))
                        #nms_op output: input[inds,:], inds
                        _, inds = nms_op(image_pred_class[:, :5], nms_conf)

                        image_pred_class = image_pred_class[inds]
                    else:
                        # Perform non-maximum suppression
                        max_detections = []
                        while image_pred_class.size(0):
                            # Get detection with highest confidence and save as max detection
                            max_detections.append(
                                image_pred_class[0].unsqueeze(0))
                            # Stop if we're at the last detection
                            if len(image_pred_class) == 1:
                                break
                            # Get the IOUs for all boxes with lower confidence
                            ious = bbox_iou(max_detections[-1],
                                            image_pred_class[1:], args)
                            # Remove detections with IoU >= NMS threshold
                            image_pred_class = image_pred_class[1:][
                                ious < nms_conf]

                        image_pred_class = torch.cat(max_detections).data

                #Concatenate the batch_id of the image to the detection
                #this helps us identify which image does the detection correspond to
                #We use a linear straucture to hold ALL the detections from the batch
                #the batch_dim is flattened
                #batch is identified by extra batch column

                batch_ind = image_pred_class.new(image_pred_class.size(0),
                                                 1).fill_(ind)
                seq = batch_ind, image_pred_class
                if not write:
                    output = torch.cat(seq, 1)
                    write = True
                else:
                    out = torch.cat(seq, 1)
                    output = torch.cat((output, out))
                num += 1

        if not num:
            return 0
        #output:(n,(batch_ind,x1,y1,x2,y2,c,s,idx of cls))
        return output

    def detect_one_img(self, img_name):
        """
        Detect bboxs in one image
        Input: 'str', full path of image
        Output: '[{"category_id":1,"score":float,"bbox":[x,y,w,h],"image_id":str},...]',
        The output results are similar with coco results type, except that image_id uses full path str
        instead of coco %012d id for generalization. 
        """
        args = self.detector_opt
        _CUDA = True
        if args:
            if args.gpus[0] < 0:
                _CUDA = False
        if not self.model:
            self.load_model()
        if isinstance(self.model, torch.nn.DataParallel):
            self.model = self.model.module
        dets_results = []
        #pre-process(scale, normalize, ...) the image
        img, orig_img, img_dim_list = prep_image(img_name, self.inp_dim)
        with torch.no_grad():
            img_dim_list = torch.FloatTensor([img_dim_list]).repeat(1, 2)
            img = img.to(args.device) if args else img.cuda()
            prediction = self.model(img, args=args)
            #do nms to the detection results, only human category is left
            dets = self.dynamic_write_results(prediction,
                                              self.confidence,
                                              self.num_classes,
                                              nms=True,
                                              nms_conf=self.nms_thres)
            if isinstance(dets, int) or dets.shape[0] == 0:
                return None
            dets = dets.cpu()

            img_dim_list = torch.index_select(img_dim_list, 0, dets[:,
                                                                    0].long())
            scaling_factor = torch.min(self.inp_dim / img_dim_list,
                                       1)[0].view(-1, 1)
            dets[:, [1, 3]] -= (self.inp_dim - scaling_factor *
                                img_dim_list[:, 0].view(-1, 1)) / 2
            dets[:, [2, 4]] -= (self.inp_dim - scaling_factor *
                                img_dim_list[:, 1].view(-1, 1)) / 2
            dets[:, 1:5] /= scaling_factor
            for i in range(dets.shape[0]):
                dets[i, [1, 3]] = torch.clamp(dets[i, [1, 3]], 0.0,
                                              img_dim_list[i, 0])
                dets[i, [2, 4]] = torch.clamp(dets[i, [2, 4]], 0.0,
                                              img_dim_list[i, 1])

                #write results
                det_dict = {}
                x = float(dets[i, 1])
                y = float(dets[i, 2])
                w = float(dets[i, 3] - dets[i, 1])
                h = float(dets[i, 4] - dets[i, 2])
                det_dict["category_id"] = 1
                det_dict["score"] = float(dets[i, 5])
                det_dict["bbox"] = [x, y, w, h]
                det_dict["image_id"] = int(
                    os.path.basename(img_name).split('.')[0])
                dets_results.append(det_dict)

            return dets_results
示例#9
0
        os.mkdir(args.outputpath)
    
    if not len(videofile):
        raise IOError('Error: must contain --video')

    # Load input video
    fvs = VideoLoader(videofile).start()
    (fourcc,fps,frameSize) = fvs.videoinfo()
    # Data writer
    save_path = os.path.join(args.outputpath, 'AlphaPose_'+videofile.split('/')[-1].split('.')[0]+'.avi')
    writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start()

    # Load YOLO model
    print('Loading YOLO model..')
    det_model = Darknet("yolo/cfg/yolov3.cfg")
    det_model.load_weights('models/yolo/yolov3.weights')
    det_model.net_info['height'] = args.inp_dim
    det_inp_dim = int(det_model.net_info['height'])
    assert det_inp_dim % 32 == 0
    assert det_inp_dim > 32
    det_model.cuda()
    det_model.eval()

    # Load pose model
    pose_dataset = Mscoco()
    if args.fast_inference:
        pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
    else:
        pose_model = InferenNet(4 * 1 + 1, pose_dataset)
    pose_model.cuda()
    pose_model.eval()
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        if opt.device == 'GPU':
            self.det_model.cuda()
        else:
            self.det_model.cpu()
        self.det_model.eval()

        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the list used to store frames read from
        # the video file
        self.Q = list()

    def start(self):
        # start to dectect person
        self.update()

    def update(self):
        # keep looping the whole dataset
        for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.Q[i]
            with torch.no_grad():
                # Human Detection
                if opt.device == 'GPU':
                    img = img.cuda()
                else:
                    img = img.cpu()

                prediction = self.det_model(
                    img, CUDA=True if opt.device == 'GPU' else False)
                # NMS process
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        self.Q.append((orig_img[k], im_name[k], None, None,
                                       None, None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    self.Q.append((orig_img[k], im_name[k], None, None, None,
                                   None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                # multiply the score with bounding box height
                processed_scores = self.cal_scores(scores, boxes_k)
                self.Q.append(
                    (orig_img[k], im_name[k],
                     boxes_k[np.argmax(processed_scores
                                       ):np.argmax(processed_scores) + 1],
                     scores[np.argmax(processed_scores)],
                     inps[np.argmax(processed_scores
                                    ):np.argmax(processed_scores) + 1],
                     pt1[np.argmax(processed_scores
                                   ):np.argmax(processed_scores) + 1],
                     pt2[np.argmax(processed_scores
                                   ):np.argmax(processed_scores) + 1]))

    def cal_scores(self, scores, boxes):
        processed_scores = scores.clone()
        for i in range(boxes.shape[0]):
            processed_scores[i][0] *= abs(boxes[i][1] - boxes[i][3])
        return processed_scores

    def len(self):
        # return list len
        return len(self.Q)
示例#11
0
class PeopleDetection():

    def __init__(self):
        super().__init__()
        self.cfgfile = "yolo/cfg/yolov3.cfg"
        self.weightsfile = "yolo/yolov3.weights"
        self.num_classes = 80
        self.confidence = 0.25
        self.nms_thesh = 0.4
        self.reso = 160

        self.start = 0
        self.CUDA = torch.cuda.is_available()

        self.num_classes = 80
        self.bbox_attrs = 5 + self.num_classes

        self.model = Darknet(self.cfgfile)
        self.model.load_weights(self.weightsfile)

        self.model.net_info["height"] = self.reso
        self.inp_dim = int(self.model.net_info["height"])

        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32

        if self.CUDA:
            self.model.cuda()

        # Switch to “evaluate” mode before predictions
        self.model.eval()

        self.classes = load_classes('yolo/data/coco.names')
        self.colors = pkl.load(open("yolo/pallete", "rb"))

    def prep_image(self, img, inp_dim):
        """
        Prepare image for inputting to the neural network. 

        Returns a Variable 
        """

        orig_im = img
        dim = orig_im.shape[1], orig_im.shape[0]
        img = cv2.resize(orig_im, (inp_dim, inp_dim))
        img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
        img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
        return img_, orig_im, dim

    def write(self, x, img, people_bounding_boxes, only_person=False):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        cls = int(x[-1])
        if (not only_person) or (only_person and self.classes[cls] == 'person'):
            label = "{0}".format(self.classes[cls])
            color = random.choice(self.colors)
            cv2.rectangle(img, c1, c2, color, 2)

            x = int(c1[0])
            y = int(c1[1])

            w = int(c2[0] - x)
            h = int(c2[1] - y)

            people_bounding_boxes.append([x, y, w, h])

            font_scale = 1.5
            line_thickness = 2
            t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, font_scale, line_thickness)[0]
            c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
            cv2.rectangle(img, c1, c2, color, -1)
            cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                        cv2.FONT_HERSHEY_PLAIN, font_scale, [225, 255, 255], line_thickness)
        return img

    def run(self, frame):
        img, orig_im, dim = self.prep_image(frame, self.inp_dim)

        im_dim = torch.FloatTensor(dim).repeat(1, 2)

        if self.CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()

        output = self.model(Variable(img), self.CUDA)
        output = write_results(output, self.confidence, self.num_classes,
                               nms=True, nms_conf=self.nms_thesh)

        if type(output) == int:
            return orig_im, False, []

        output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(self.inp_dim)) / self.inp_dim

        #            im_dim = im_dim.repeat(output.size(0), 1)
        output[:, [1, 3]] *= frame.shape[1]
        output[:, [2, 4]] *= frame.shape[0]

        people_bounding_boxes = []

        list(map(lambda x: self.write(x, orig_im, people_bounding_boxes, only_person=True), output))

        return orig_im, True, people_bounding_boxes
示例#12
0
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024, use_boxGT=False, gt_json=''):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet(opt.yolo_model_cfg)
        self.det_model.load_weights(opt.yolo_model_path)
        self.det_model.net_info['height'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['height'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

        self.use_boxGT = use_boxGT
        if use_boxGT:
            print('loading grondtruth box.')
            self.box_gt = box_gt_class(gt_json)


    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        # keep looping the whole dataset
        for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                # Human Detection
                img = img.cuda()
                prediction = self.det_model(img, CUDA=True)
                # NMS process
                dets = dynamic_write_results(prediction, opt.confidence,
                                    opt.num_classes, nms=True, nms_conf=opt.nms_thesh)
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None, None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

                
                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            # print(scores.shape,img.shape,boxes.shape,len(orig_img))
            for k in range(len(orig_img)):
                if not self.use_boxGT:
                    boxes_k = boxes[dets[:,0]==k]
                    scores_k = scores[dets[:,0]==k]
                else:
                    boxes_k = self.box_gt.get_box(im_name[k].split('/')[-1])
                    # print(boxes_k.shape)
                    scores_k = torch.ones((boxes_k.shape[0],1))
                # print(boxes_k.shape,img.shape,scores[dets[:,0]==k].shape,len(orig_img))

                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None, None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k, scores_k, inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
示例#13
0
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights("models/yolo/yolov3-spp.weights")
        self.det_model.net_info['hight'] = opt.inp_dim
        self.det_inp_dim = int(self.det_model.net_info['hight'])
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover

        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            # 当网络中的某一个tensor不需要梯度时,可以使用torch.no_grad()来处理
            with torch.no_grad():
                # Human Detction
                img = img.cuda()
                prediction = self.det_model(img, CUDA=True)
                dets = dynamic_write_results(prediction,
                                             opt.confidence,
                                             opt.num_classes,
                                             nms=True,
                                             nms_conf=opt.nms_thesh)
                # mul person
                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                print("boxes", boxes)
                scores = dets[:, 5:6]
                print("scoes", scores)

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        return self.Q.get()

    def len(self):
        return self.Q.qsize()
class PeopleDetection():
    def __init__(self):
        super().__init__()
        self.cfgfile = "yolo/cfg/yolov3.cfg"
        self.weightsfile = "yolo/yolov3.weights"
        self.num_classes = 80
        self.confidence = 0.25
        self.nms_thesh = 0.4
        self.reso = 160

        self.start = 0
        self.CUDA = torch.cuda.is_available()

        self.num_classes = 80
        self.bbox_attrs = 5 + self.num_classes

        self.model = Darknet(self.cfgfile)
        self.model.load_weights(self.weightsfile)

        self.model.net_info["height"] = self.reso
        self.inp_dim = int(self.model.net_info["height"])

        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32

        if self.CUDA:
            self.model.cuda()

        self.model.eval()

        self.classes = load_classes('yolo/data/coco.names')
        self.colors = pkl.load(open("yolo/pallete", "rb"))

    def prep_image(self, img, inp_dim):
        """
        Prepare image for inputting to the neural network. 

        Returns a Variable 
        """

        orig_im = img
        dim = orig_im.shape[1], orig_im.shape[0]
        img = cv2.resize(orig_im, (inp_dim, inp_dim))
        img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
        img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
        return img_, orig_im, dim

    def write(self, x, img, only_person=False, paintings_bounding_boxes=None):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        cls = int(x[-1])
        count_persons = 0
        if (not only_person) or (only_person
                                 and self.classes[cls] == 'person'):
            label = "{0}".format(self.classes[cls])
            color = random.choice(self.colors)

            # if person is inside a painting
            if paintings_bounding_boxes is not None:
                # (x_min, x_max, y_min, y_max)
                for pbb in paintings_bounding_boxes:
                    if c1[0] > pbb[0] and c1[1] < pbb[1] and c2[0] > pbb[
                            2] and c2[1] < pbb[3]:
                        return 0

            if c1 == (0, 0) and c2 == (0, 0):
                return 0

            cv2.rectangle(img, c1, c2, color, 1)
            t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
            c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
            cv2.rectangle(img, c1, c2, color, -1)
            cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                        cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
            if only_person and self.classes[cls] == 'person':
                count_persons += 1
        return count_persons

    def run(self, frame, paintings_bounding_boxes=None):
        img, orig_im, dim = self.prep_image(frame, self.inp_dim)

        im_dim = torch.FloatTensor(dim).repeat(1, 2)

        if self.CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()

        output = self.model(Variable(img), self.CUDA)
        output = write_results(output,
                               self.confidence,
                               self.num_classes,
                               nms=True,
                               nms_conf=self.nms_thesh)

        if type(output) == int:
            return orig_im, False, 0

        output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(
            self.inp_dim)) / self.inp_dim

        output[:, [1, 3]] *= frame.shape[1]
        output[:, [2, 4]] *= frame.shape[0]

        persons_list = list(
            map(
                lambda x: self.write(x,
                                     orig_im,
                                     only_person=True,
                                     paintings_bounding_boxes=
                                     paintings_bounding_boxes), output))

        persons = np.array(persons_list).sum()

        return orig_im, True, persons
示例#15
0
class DetectionLoader:
    def __init__(self, dataloder, batchSize=1, queueSize=1024):
        # initialize the file video stream along with the boolean
        # used to indicate if the thread should be stopped or not
        self.det_model = Darknet("yolo/cfg/yolov3-spp.cfg")
        self.det_model.load_weights('models/yolo/yolov3-spp.weights')
        self.det_model.net_info['height'] = opt.inp_dim
        # self.det_inp_dim = int(self.det_model.net_info['height'])
        self.det_inp_dim = int(opt.inp_dim)
        assert self.det_inp_dim % 32 == 0
        assert self.det_inp_dim > 32
        self.det_model.cuda()
        self.det_model.eval()

        self.stopped = False
        self.dataloder = dataloder
        self.batchSize = batchSize
        self.datalen = self.dataloder.length()
        leftover = 0
        if (self.datalen) % batchSize:
            leftover = 1
        self.num_batches = self.datalen // batchSize + leftover
        # initialize the queue used to store frames read from
        # the video file
        if opt.sp:
            self.Q = Queue(maxsize=queueSize)
        else:
            self.Q = mp.Queue(maxsize=queueSize)

    def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self

    def update(self):
        # keep looping the whole dataset
        from mtcnn.mtcnn import MTCNN
        detector = MTCNN()
        for i in range(self.num_batches):
            img, orig_img, im_name, im_dim_list = self.dataloder.getitem()
            if img is None:
                self.Q.put((None, None, None, None, None, None, None))
                return

            with torch.no_grad():
                if self.dataloder.format == 'yolo':
                    # Human Detection
                    img = img.cuda()
                    prediction = self.det_model(img, CUDA=True)
                    # NMS process
                    dets = dynamic_write_results(prediction,
                                                 opt.confidence,
                                                 opt.num_classes,
                                                 nms=True,
                                                 nms_conf=opt.nms_thesh)

                elif self.dataloder.format == 'mtcnn':
                    # Face detection
                    imgs_np = img.float().mul(255.0).cpu().numpy()
                    imgs_np = np.squeeze(imgs_np, axis=0)
                    imgs_np = np.transpose(imgs_np, (1, 2, 0))
                    dets = detector.detect_faces(imgs_np)
                    fac_det = []
                    for det in dets:
                        fac_det.append([
                            0, det["box"][0], det["box"][1],
                            det["box"][0] + det["box"][2],
                            det["box"][1] + det["box"][3], det["confidence"],
                            0.99, 0
                        ])
                    dets = torch.tensor(fac_det)

                if isinstance(dets, int) or dets.shape[0] == 0:
                    for k in range(len(orig_img)):
                        if self.Q.full():
                            time.sleep(2)
                        self.Q.put((orig_img[k], im_name[k], None, None, None,
                                    None, None))
                    continue
                dets = dets.cpu()
                im_dim_list = torch.index_select(im_dim_list, 0,
                                                 dets[:, 0].long())
                scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                           1)[0].view(-1, 1)

                # coordinate transfer
                dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 0].view(-1, 1)) / 2
                dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                    im_dim_list[:, 1].view(-1, 1)) / 2

                dets[:, 1:5] /= scaling_factor
                for j in range(dets.shape[0]):
                    dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                  im_dim_list[j, 0])
                    dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                  im_dim_list[j, 1])
                boxes = dets[:, 1:5]
                scores = dets[:, 5:6]

            for k in range(len(orig_img)):
                boxes_k = boxes[dets[:, 0] == k]
                if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                    if self.Q.full():
                        time.sleep(2)
                    self.Q.put((orig_img[k], im_name[k], None, None, None,
                                None, None))
                    continue
                inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH,
                                   opt.inputResW)
                pt1 = torch.zeros(boxes_k.size(0), 2)
                pt2 = torch.zeros(boxes_k.size(0), 2)
                if self.Q.full():
                    time.sleep(2)
                self.Q.put((orig_img[k], im_name[k], boxes_k,
                            scores[dets[:, 0] == k], inps, pt1, pt2))

    def read(self):
        # return next frame in the queue
        return self.Q.get()

    def len(self):
        # return queue len
        return self.Q.qsize()
示例#16
0
class Yolo:
    def __init__(self):
        # set args
        self.args = self.arg_parse()
        self.confidence = float(self.args.confidence)
        self.nms_thesh = float(self.args.nms_thresh)

        # load file
        self.cfgfile = "yolo/cfg/yolov3.cfg"
        self.weightsfile = "yolo/yolov3.weights"
        self.classes = load_classes('yolo/data/coco.names')
        self.colors = pkl.load(open("yolo/pallete", "rb"))

        # set model
        self.num_classes = 80
        self.bbox_attrs = 5 + self.num_classes

        self.model = Darknet(self.cfgfile)
        self.model.load_weights(self.weightsfile)

        self.model.net_info["height"] = self.args.reso
        self.inp_dim = int(self.model.net_info["height"])

        self.model.eval()

    def arg_parse(self):

        parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo')
        parser.add_argument("--confidence",
                            dest="confidence",
                            help="Object Confidence to filter predictions",
                            default=0.25)
        parser.add_argument("--nms_thresh",
                            dest="nms_thresh",
                            help="NMS Threshhold",
                            default=0.4)
        parser.add_argument(
            "--reso",
            dest='reso',
            help=
            "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
            default="160",
            type=str)
        return parser.parse_args()

    def prep_image(self, img, inp_dim):
        orig_im = img
        dim = orig_im.shape[1], orig_im.shape[0]
        img = cv2.resize(orig_im, (inp_dim, inp_dim))
        img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
        img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
        return img_, orig_im, dim

    def write(self, x, img):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        cls = int(x[-1])
        label = "{0}".format(self.classes[cls])
        color = random.choice(self.colors)
        cv2.rectangle(img, c1, c2, color, 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2, color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                    cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
        # print("[*] label:{} c1x:{} c1y:{} c2x:{} c2y:{}".format(label, c1[0], c1[1], c2[0], c2[1]))

        return [
            label, c1[0].numpy(), c1[1].numpy(), c2[0].numpy(), c2[1].numpy()
        ]

    def detect_bbox(self, frame, imshow=True):

        img, orig_im, dim = self.prep_image(frame, self.inp_dim)

        CUDA = False  # torch.cuda.is_available()
        output = self.model(Variable(img), CUDA)
        output = write_results(output,
                               self.confidence,
                               self.num_classes,
                               nms=True,
                               nms_conf=self.nms_thesh)

        output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(
            self.inp_dim)) / self.inp_dim

        output[:, [1, 3]] *= frame.shape[1]
        output[:, [2, 4]] *= frame.shape[0]

        bbox = list(map(lambda x: self.write(x, orig_im), output))

        if imshow:
            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(3 * 1000)  # wait and show msec

        return bbox