示例#1
0
def detect_attributes(image, yolo_dim, yolov3, encoder):
    ''' detect_attributes
    '''
    text_results = []
    image, orig_img, im_dim = prep_image(image, yolo_dim)
    im_dim = torch.FloatTensor(im_dim).repeat(1, 2)

    image_tensor = image.to(device)
    im_dim = im_dim.to(device)

    # Generate an caption from the image
    # prediction mode for yolo-v3
    detections = yolov3(image_tensor, device, True)
    detections = write_results(
        detections,
        args.confidence,
        device,
        num_classes=80,
        nms=True,
        nms_conf=args.nms_thresh,
    )
    # original image dimension --> im_dim
    # view_image(detections)

    os.system("clear")
    if not isinstance(detections, int):
        if detections.shape[0]:
            bboxs = detections[:, 1:5].clone()
            im_dim = im_dim.repeat(detections.shape[0], 1)
            scaling_factor = torch.min(yolo_dim / im_dim, 1)[0].view(-1, 1)

            detections[:, [1, 3]] -= (
                yolo_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
            detections[:, [2, 4]] -= (
                yolo_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

            detections[:, 1:5] /= scaling_factor

            small_object_ratio = torch.FloatTensor(detections.shape[0])

            for i in range(detections.shape[0]):
                detections[i, [1, 3]] = torch.clamp(detections[i, [1, 3]], 0.0,
                                                    im_dim[i, 0])
                detections[i, [2, 4]] = torch.clamp(detections[i, [2, 4]], 0.0,
                                                    im_dim[i, 1])

                object_area = (detections[i, 3] - detections[i, 1]) * (
                    detections[i, 4] - detections[i, 2])
                orig_img_area = im_dim[i, 0] * im_dim[i, 1]
                small_object_ratio[i] = object_area / orig_img_area

            detections = detections[small_object_ratio > 0.02]
            im_dim = im_dim[small_object_ratio > 0.02]

            if detections.size(0) > 0:
                feature = yolov3.get_feature()
                feature = feature.repeat(detections.size(0), 1, 1, 1)

                scaling_val = 16

                bboxs /= scaling_val
                bboxs = bboxs.round()
                bboxs_index = torch.arange(bboxs.size(0), dtype=torch.int)
                bboxs_index = bboxs_index.to(device)
                bboxs = bboxs.to(device)

                roi_align = RoIAlign(args.roi_size,
                                     args.roi_size,
                                     transform_fpcoor=True).to(device)
                roi_features = roi_align(feature, bboxs, bboxs_index)

                outputs = encoder(roi_features)

                for i in range(detections.shape[0]):

                    sampled_caption = []

                    for j in range(len(outputs) - 1):
                        max_index = torch.max(outputs[j][i].data, 0)[1]
                        word = attribute_pool[j][max_index]
                        sampled_caption.append(word)
            # for reversion lower length and lower type
                    c11 = sampled_caption[11]
                    sampled_caption[11] = sampled_caption[10]
                    sampled_caption[10] = c11

                    sentence = " ".join(sampled_caption)

                    print(str(i + 1) + ": " + sentence)
                    write(
                        detections[i],
                        orig_img,
                        sentence,
                        i + 1,
                        coco_classes,
                        colors,
                    )
                return text_results, orig_img
示例#2
0
def OpenPro1():
    def get_test_input(input_dim, CUDA):
        img = cv2.imread("imgs/messi.jpg")
        img = cv2.resize(img, (input_dim, input_dim))
        img_ = img[:, :, ::-1].transpose((2, 0, 1))
        img_ = img_[np.newaxis, :, :, :] / 255.0
        img_ = torch.from_numpy(img_).float()
        img_ = Variable(img_)

        if CUDA:
            img_ = img_.cuda()

        return img_

    def prep_image(img, inp_dim):
        """
        Prepare image for inputting to the neural network. 
        
        Returns a Variable 
        """

        orig_im = img
        dim = orig_im.shape[1], orig_im.shape[0]
        img = cv2.resize(orig_im, (inp_dim, inp_dim))
        img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
        img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
        return img_, orig_im, dim

    def write(x, img):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        cls = int(x[-1])
        label = "{0}".format(classes[cls])
        print(label)
        write_db(label)
        color = random.choice(colors)
        cv2.rectangle(img, c1, c2, color, 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2, color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                    cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
        return img

    def write_db(label):
        conn = None
        try:
            conn = sqlite3.connect(
                'C:\\yolo\\pytorch-yolo-v3\\db\\final_bill.db')
            print('connected to db macha')
            sql = ''' INSERT INTO bill(item_name,price,weight,amount)
                  VALUES(?,?,?,?) '''
            sql1 = "SELECT * FROM rate WHERE item_name = ?"
            sql2 = "SELECT * FROM weights WHERE item_name = ?"
            cur = conn.cursor()
            q = [label]
            result = cur.execute(sql1, q)
            records = cur.fetchall()
            gk = 0
            for row in records:
                print("price = ", row[1])
                gk = row[1]
            print(result)
            result = cur.execute(sql2, q)
            weights = cur.fetchall()
            mk = 0
            for rows in weights:
                print("weight = ", rows[1])
                mk = rows[1]
            print(result)
            amt = gk * mk
            p = [label, gk, mk, amt]
            cur.execute(sql, p)
            conn.commit()
            conn.close()
            print('inserted')
        except Error as e:
            print(e)

    def arg_parse():
        """
        Parse arguements to the detect module
        
        """

        parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo')
        parser.add_argument("--confidence",
                            dest="confidence",
                            help="Object Confidence to filter predictions",
                            default=0.25)
        parser.add_argument("--nms_thresh",
                            dest="nms_thresh",
                            help="NMS Threshhold",
                            default=0.4)
        parser.add_argument(
            "--reso",
            dest='reso',
            help=
            "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
            default="160",
            type=str)
        return parser.parse_args()

    cfgfile = "cfg/yolov3.cfg"
    weightsfile = "yolov3.weights"
    num_classes = 80

    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    CUDA = torch.cuda.is_available()

    num_classes = 80
    bbox_attrs = 5 + num_classes

    model = Darknet(cfgfile)
    model.load_weights(weightsfile)

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    videofile = 'video.avi'

    cap = cv2.VideoCapture(0)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():

        ret, frame = cap.read()
        if ret:

            img, orig_im, dim = prep_image(frame, inp_dim)

            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)
            #print(output)

            if type(output) == int:
                frames += 1
                print("FPS of the video is {:5.2f}".format(
                    frames / (time.time() - start)))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0,
                                         float(inp_dim)) / inp_dim

            #            im_dim = im_dim.repeat(output.size(0), 1)
            output[:, [1, 3]] *= frame.shape[1]
            output[:, [2, 4]] *= frame.shape[0]

            classes = load_classes('data/coco.names')
            colors = pkl.load(open("pallete", "rb"))

            list(map(lambda x: write(x, orig_im), output))

            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            frames += 1
            print("FPS of the video is {:5.2f}".format(frames /
                                                       (time.time() - start)))

        else:
            break
示例#3
0
def video_yolo_ready2():
    cfgfile = "cfg/yolov3.cfg"
    weightsfile = "yolov3.weights"
    num_classes = 5
    confidence = 0.6
    nms_thesh = 0.5
    start = 0
    CUDA = torch.cuda.is_available()

    num_classes = 5
    bbox_attrs = 5 + num_classes

    model = Darknet(cfgfile)
    model.load_weights(weightsfile)

    model.net_info["height"] = 416  # 160을 보다 낮은 수 넣으면속도는 빨라짐(단,32의배수값만넣어야함)
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    videofile = 'yolo/static/videos/cctv3_video.mp4'

    cap = cv2.VideoCapture(videofile)
    assert cap.isOpened(), 'Cannot capture source'

    global frames
    global picture
    frames = 0

    start = time.time()
    while cap.isOpened():

        ret, frame = cap.read()
        if ret:
            img, orig_im, dim = prep_image(frame, inp_dim)

            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            with torch.no_grad():
                output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)
            global label_list
            global flag
            if type(output) == int:
                print("FPS of the video is {:5.2f}".format(
                    frames / (time.time() - start)))

                picture = orig_im
                label_list = list(map(lambda x: write(x, orig_im), output))
                print("label_list : ", label_list)
                collision(label_list)

                flag = 0
                #이미지 저장하는 코드

                ret2, jpeg2 = cv2.imencode('.jpg', orig_im)
                detect_image_byte = jpeg2.tobytes()
                yield (b'--frame\r\n'
                       b'Content-Type: image/jpeg\r\n\r\n' +
                       detect_image_byte + b'\r\n\r\n')
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

            output[:,
                   [1, 3]] -= (inp_dim -
                               scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
            output[:,
                   [2, 4]] -= (inp_dim -
                               scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

            output[:, 1:5] /= scaling_factor

            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                im_dim[i, 0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                im_dim[i, 1])

            classes = load_classes('data/coco.names')
            colors = pkl.load(open("pallete", "rb"))

            picture = orig_im
            label_list = list(map(lambda x: write(x, orig_im), output))
            print(label_list)
            collision(label_list)
            flag = 0

            ret2, jpeg2 = cv2.imencode('.jpg', orig_im)
            detect_image_byte = jpeg2.tobytes()
            yield (b'--frame\r\n'
                   b'Content-Type: image/jpeg\r\n\r\n' + detect_image_byte +
                   b'\r\n\r\n')
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            frames += 1
            print("FPS of the video is {:5.2f}".format(frames /
                                                       (time.time() - start)))
        else:
            break
示例#4
0
        videofile = args.video
        cap = cv2.VideoCapture(videofile)
    else:
        # On mac, 0 is bulit-in camera and 1 is USB webcam on Mac
        # On linux, 0 is video0, 1 is video1 and so on
        cap = cv2.VideoCapture(args.source)
    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()

    while cap.isOpened():
        ret, frame = cap.read()
        if ret:

            img, orig_im, orig_dim = prep_image(frame, model_dim)
            orig_dim = torch.FloatTensor(orig_dim).repeat(1, 2)

            with torch.no_grad():
                output = model(img)

            # output is, after write_results, [batch index, x1, y1, x2, y2, objectness score, class index, class prob]
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   model_dim,
                                   orig_dim,
                                   nms=True,
                                   nms_conf=nms_thesh)

            # If no preds, just show image and go to next pred
示例#5
0
def main_main():
    global classes, colors
    cfgfile = "cfg/yolov3.cfg"
    weightsfile = "yolov3.weights"
    num_classes = 80

    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    CUDA = torch.cuda.is_available()

    num_classes = 80
    bbox_attrs = 5 + num_classes

    model = Darknet(cfgfile)
    model.load_weights(weightsfile)

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    videofile = 'video.avi'

    cap = cv2.VideoCapture(0)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():

        ret, frame = cap.read()
        if ret:

            img, orig_im, dim = prep_image(frame, inp_dim)
            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                #print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0,
                                         float(inp_dim)) / inp_dim
            #print(float(inp_dim),inp_dim)

            #            im_dim = im_dim.repeat(output.size(0), 1)
            output[:, [1, 3]] *= frame.shape[1]
            output[:, [2, 4]] *= frame.shape[0]

            classes = load_classes('data/coco.names')
            colors = pkl.load(open("pallete", "rb"))

            list(map(lambda x: write(x, orig_im), output))
            cv2.imshow("frame", orig_im)  #show the frame / output
            key = cv2.waitKey(1)
            #time.sleep(1)
            if key & 0xFF == ord('q'):
                break
            frames += 1
            #print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
        else:
            break
示例#6
0
def object_detection():
    """
        Will load the pre-trained weight file and the cfg file which has knowledge of 80 different objects 
        Using the arg_parse function it will compare the confidence and threshold value of every object in a given frame

    """

    cfgfile = "cfg/yolov3.cfg"
    weightsfile = "yolov3.weights"
    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    num_classes = 80
    width,height = 640, 480
    q = queue.Queue()

    CUDA = torch.cuda.is_available()

    bbox_attrs = 5 + num_classes

    print("Loading network.....")
    model = Darknet(cfgfile)
    model.load_weights(weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32
    
    if CUDA:
        model.cuda()

    #### Test the performance of the model on a Static Image
    # model(get_test_input(inp_dim, CUDA), CUDA)
    # model.eval()
    ####

    #### Test the performance of the model on any video file
    videofile = 'video3.avi'
    ####

    #### If you are using any thrird party camera access using IP address you can use this part of the code
    # address = ConnectionServer.connect()
    # address = 'http://' + address[0] + ':8000/stream.mjpg'
    # print("Fetching Video from", address)
    ####
    

    # assert cap.isOpened(), 'Cannot capture source'   #### If camera is not found assert this message
    count = 0
    frames = 0
    start = time.time()
    cap = cv2.VideoCapture(0)
    # while cap.isOpened():
        # ret, frame = cap.read()
    while True:
        ret, frame = cap.read()
        # if ret:
        img, orig_im, dim = prep_image(frame, inp_dim)  #### Pre-processing part of every frame that came from the source
        im_dim = torch.FloatTensor(dim).repeat(1,2)

        if CUDA:                            #### If you have a gpu properly installed then it will run on the gpu
            im_dim = im_dim.cuda()
            img = img.cuda()

        with torch.no_grad():               #### Set the model in the evaluation mode
            output = model(Variable(img), CUDA)
        output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)  #### Localize the objects in a frame

        if type(output) == int:
            frames += 1
            print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
            cv2.imshow("Object Detection Window", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            continue

        #im_dim = im_dim.repeat(output.size(0), 1)
        #scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1)

        output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim)) / inp_dim
        im_dim = im_dim.repeat(output.size(0), 1)
        output[:, [1, 3]] *= frame.shape[1]
        output[:, [2, 4]] *= frame.shape[0]

        #output[:,1:5] /= scaling_factor

        # for i in range(output.shape[0]):
        #     output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0])
        #     output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1])

        classes = load_classes('data/coco.names')
        colors = pkl.load(open("pallete", "rb"))

        list(map(lambda x: write(x, orig_im, classes, colors), output))

        cv2.imshow("Object Detection Window", orig_im) #### Generating the window
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            break
        frames += 1

        # print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
        l = print_labels()[0]
        print(l)
        hog = cv2.HOGDescriptor()
        hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
        # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        found,w = hog.detectMultiScale(frame, winStride=(8,8), padding=(32,32), scale=1.05)
        # time.sleep(2)
        # print(found)
        # print(len(found))
        # draw_detections(frame, found)
        get_number_of_object, get_distance= draw_detections(frame,found)
        if get_number_of_object >=1 and get_distance!=0:
            feedback = ("{}".format(get_number_of_object)+ " " +l+" at {}".format(round(get_distance))+"Inches")
            speak.Speak(feedback)
            print(feedback)
        else:
            feedback = ("{}".format("1")+ " " +l)
            speak.Speak(feedback)
            print(feedback)
    # Stop the capture
    cap.release()
    # Destory the window
    cv2.destroyAllWindows()
示例#7
0
    def detect_objects(self, image_path):
        image_prep = prep_image(image_path, self.inp_dim)
        im_batches = [image_prep[0]]
        orig_ims = [image_prep[1]]
        im_dim_list = [image_prep[2]]
        im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)

        img_path = image_path

        if self.CUDA:
            im_dim_list = im_dim_list.cuda()

        write = False
        self.model(get_test_input(self.inp_dim, self.CUDA), self.CUDA)

        objs = {}
        i = 0
        for batch in im_batches:
            if self.CUDA:
                batch = batch.cuda()

            with torch.no_grad():
                prediction = self.model(Variable(batch), self.CUDA)

            prediction = prediction[:, self.scales_indices]

            prediction = write_results(prediction,
                                       self.confidence,
                                       self.num_classes,
                                       nms=True,
                                       nms_conf=self.nms_thresh)
            prediction[:, 0] += i * self.batch_size

            if not write:
                output = prediction
                write = 1
            else:
                output = torch.cat((output, prediction))

            for im_num, image in enumerate(img_path[i * self.batch_size:min(
                (i + 1) * self.batch_size, len(img_path))]):
                im_id = i * self.batch_size + im_num
                objs = [
                    self.classes[int(x[-1])] for x in output
                    if int(x[0]) == im_id
                ]
                print("{0:20s} {1:s}".format("Objects Detected:",
                                             " ".join(objs)))
                print(
                    "----------------------------------------------------------"
                )
            i += 1

            if self.CUDA:
                torch.cuda.synchronize()

        try:
            output
        except NameError:
            print("No detections were made")
            exit()

        im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long())

        scaling_factor = torch.min(self.inp_dim / im_dim_list,
                                   1)[0].view(-1, 1)

        output[:,
               [1, 3]] -= (self.inp_dim -
                           scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
        output[:,
               [2, 4]] -= (self.inp_dim -
                           scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

        output[:, 1:5] /= scaling_factor

        for i in range(output.shape[0]):
            output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                            im_dim_list[i, 0])
            output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                            im_dim_list[i, 1])

        def write(x, batches, results):
            c1 = tuple(x[1:3].int())
            c2 = tuple(x[3:5].int())
            img = results[int(x[0])]
            cls = int(x[-1])
            label = "{0}".format(self.classes[cls])
            color = random.choice(self.colors)
            cv2.rectangle(img, c1, c2, color, 1)
            t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
            c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
            cv2.rectangle(img, c1, c2, color, -1)
            cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                        cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
            return img

        list(map(lambda x: write(x, im_batches, orig_ims), output))

        det_names = pd.Series(img_path).apply(
            lambda x: "{}/det_{}".format(self.save_directory,
                                         x.split("/")[-1]))

        cv2.imwrite(det_names[0], orig_ims[0])
        torch.cuda.empty_cache()
        ret_path = det_names[0]

        return ret_path, objs, orig_ims[0]
示例#8
0
def main(args):
    # Image preprocessing
    transform = transforms.Compose([transforms.ToTensor()])

    # Load vocabulary wrapper

    # Build the models
    #CUDA = torch.cuda.is_available()

    num_classes = 80
    yolov3 = Darknet(args.cfg_file)
    yolov3.load_weights(args.weights_file)
    yolov3.net_info["height"] = args.reso
    inp_dim = int(yolov3.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32
    print("yolo-v3 network successfully loaded")

    attribute_size = [15, 7, 3, 5, 8, 4, 15, 7, 3, 5, 3, 3, 4]

    encoder = EncoderClothing(args.embed_size, device, args.roi_size,
                              attribute_size)

    # Prepare an image
    images = "test"

    try:
        list_dir = os.listdir(images)
        #   list_dir.sort(key=lambda x: int(x[:-4]))
        imlist = [
            osp.join(osp.realpath('.'), images, img) for img in list_dir
            if os.path.splitext(img)[1] == '.jpg' or os.path.splitext(img)[1]
            == '.JPG' or os.path.splitext(img)[1] == '.png'
        ]
    except NotADirectoryError:
        imlist = []
        imlist.append(osp.join(osp.realpath('.'), images))
        print('Not a directory error')
    except FileNotFoundError:
        print("No file or directory with the name {}".format(images))
        exit()

    yolov3.to(device)
    encoder.to(device)

    yolov3.eval()
    encoder.eval()

    encoder.load_state_dict(torch.load(args.encoder_path))

    for inx, image in enumerate(imlist):

        #print(image)
        image, orig_img, im_dim = prep_image(image, inp_dim)
        im_dim = torch.FloatTensor(im_dim).repeat(1, 2)

        image_tensor = image.to(device)
        im_dim = im_dim.to(device)

        # Generate an caption from the image
        detections = yolov3(image_tensor, device,
                            True)  # prediction mode for yolo-v3
        detections = write_results(detections,
                                   args.confidence,
                                   num_classes,
                                   device,
                                   nms=True,
                                   nms_conf=args.nms_thresh)
        # original image dimension --> im_dim
        #view_image(detections)

        os.system('clear')
        if type(detections) != int:
            if detections.shape[0]:
                bboxs = detections[:, 1:5].clone()
                im_dim = im_dim.repeat(detections.shape[0], 1)
                scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

                detections[:, [1, 3]] -= (
                    inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
                detections[:, [2, 4]] -= (
                    inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

                detections[:, 1:5] /= scaling_factor

                small_object_ratio = torch.FloatTensor(detections.shape[0])

                for i in range(detections.shape[0]):
                    detections[i,
                               [1, 3]] = torch.clamp(detections[i, [1, 3]],
                                                     0.0, im_dim[i, 0])
                    detections[i,
                               [2, 4]] = torch.clamp(detections[i, [2, 4]],
                                                     0.0, im_dim[i, 1])

                    object_area = (detections[i, 3] - detections[i, 1]) * (
                        detections[i, 4] - detections[i, 2])
                    orig_img_area = im_dim[i, 0] * im_dim[i, 1]
                    small_object_ratio[i] = object_area / orig_img_area

                detections = detections[small_object_ratio > 0.02]
                im_dim = im_dim[small_object_ratio > 0.02]

                if detections.size(0) > 0:
                    feature = yolov3.get_feature()
                    feature = feature.repeat(detections.size(0), 1, 1, 1)

                    #orig_img_dim = im_dim[:, 1:]
                    #orig_img_dim = orig_img_dim.repeat(1, 2)

                    scaling_val = 16

                    bboxs /= scaling_val
                    bboxs = bboxs.round()
                    bboxs_index = torch.arange(bboxs.size(0), dtype=torch.int)
                    bboxs_index = bboxs_index.to(device)
                    bboxs = bboxs.to(device)

                    roi_align = RoIAlign(args.roi_size,
                                         args.roi_size,
                                         transform_fpcoor=True).to(device)
                    roi_features = roi_align(feature, bboxs, bboxs_index)
                    #    print(roi_features)
                    #    print(roi_features.size())

                    #roi_features = roi_features.reshape(roi_features.size(0), -1)

                    #roi_align_feature = encoder(roi_features)

                    outputs = encoder(roi_features)
                    #attribute_size = [15, 7, 3, 5, 7, 4, 15, 7, 3, 5, 4, 3, 4]
                    #losses = [criteria[i](outputs[i], targets[i]) for i in range(len(attribute_size))]

                    for i in range(detections.shape[0]):

                        sampled_caption = []
                        #attr_fc = outputs[]
                        for j in range(len(outputs)):
                            #temp = outputs[j][i].data
                            max_index = torch.max(outputs[j][i].data, 0)[1]
                            word = attribute_pool[j][max_index]
                            sampled_caption.append(word)

                        c11 = sampled_caption[11]
                        sampled_caption[11] = sampled_caption[10]
                        sampled_caption[10] = c11

                        sentence = ' '.join(sampled_caption)

                        # again sampling for testing
                        #print ('---------------------------')
                        print(str(i + 1) + ': ' + sentence)
                        write(detections[i], orig_img, sentence, i + 1,
                              coco_classes, colors)
                        #list(map(lambda x: write(x, orig_img, captions), detections[i].unsqueeze(0)))

        cv2.imshow("frame", orig_img)
        key = cv2.waitKey(0)
        os.system('clear')
        if key & 0xFF == ord('q'):
            break
示例#9
0
def yolo_human_det(img, model=None, reso=416, confidence=0.70):
    args = arg_parse()
    # args.reso = reso
    inp_dim = reso
    num_classes = 80

    CUDA = torch.cuda.is_available()
    if model is None:
        model = load_model(args, CUDA, inp_dim)

    if type(img) == str:
        assert os.path.isfile(img), 'The image path does not exist'
        img = cv2.imread(img)

    img, ori_img, img_dim = preprocess.prep_image(img, inp_dim)
    img_dim = torch.FloatTensor(img_dim).repeat(1, 2)

    with torch.no_grad():
        if CUDA:
            img_dim = img_dim.cuda()
            img = img.cuda()
        output = model(img, CUDA)
        output = write_results(output,
                               confidence,
                               num_classes,
                               nms=True,
                               nms_conf=args.nms_thresh,
                               det_hm=True)

        if len(output) == 0:
            return None, None

        img_dim = img_dim.repeat(output.size(0), 1)
        scaling_factor = torch.min(inp_dim / img_dim, 1)[0].view(-1, 1)

        output[:, [1, 3]] -= (inp_dim -
                              scaling_factor * img_dim[:, 0].view(-1, 1)) / 2
        output[:, [2, 4]] -= (inp_dim -
                              scaling_factor * img_dim[:, 1].view(-1, 1)) / 2
        output[:, 1:5] /= scaling_factor

        for i in range(output.shape[0]):
            output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, img_dim[i,
                                                                            0])
            output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, img_dim[i,
                                                                            1])

    bboxs = []
    scores = []
    for i in range(len(output)):
        item = output[i]
        bbox = item[1:5].cpu().numpy()
        # conver float32 to .2f data
        bbox = [round(i, 2) for i in list(bbox)]
        score = item[5].cpu().numpy()
        bboxs.append(bbox)
        scores.append(score)
    scores = np.expand_dims(np.array(scores), 1)
    bboxs = np.array(bboxs)

    return bboxs, scores
示例#10
0
def SlowFast(model, CUDA, videofile, video_name):
    # Video file on which you want to run the model
    cap = cv2.VideoCapture(videofile)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    last = np.array([])
    last_time = time.time()

    start = time.time()

    #######for sp detec##########
    buffer = deque(maxlen=64)
    resize_width = 400
    resize_height = 300

    count = 0
    while cap.isOpened():

        ret, frame = cap.read()
        if ret:

            #######for sp detec##########
            f = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # will resize frames if not already final size
            f = cv2.resize(frame, (resize_width, resize_height))
            f = normalize(f)
            buffer.append(f)
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            print(frame_height, frame_width)
            scale = [resize_width / frame_width, resize_height / frame_height]

            img, orig_im, dim = prep_image(frame, inp_dim)

            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            with torch.no_grad():
                output = model(Variable(img), CUDA)
            output = write_results(
                output, confidence, num_classes, nms=True, nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                # ZZ+= 1
                print("FPS of the video is {:5.2f}".format(
                    frames / (time.time() - start)))
                # print(ZZ)
                #cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim/im_dim, 1)[0].view(-1, 1)

            output[:, [1, 3]] -= (inp_dim - scaling_factor *
                                  im_dim[:, 0].view(-1, 1))/2
            output[:, [2, 4]] -= (inp_dim - scaling_factor *
                                  im_dim[:, 1].view(-1, 1))/2

            output[:, 1:5] /= scaling_factor

            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(
                    output[i, [1, 3]], 0.0, im_dim[i, 0])
                output[i, [2, 4]] = torch.clamp(
                    output[i, [2, 4]], 0.0, im_dim[i, 1])

            output = output.cpu().data.numpy()
            # print(output)
            bbox_xywh = output[:, 1:5]
            bbox_xywh[:, 2] = bbox_xywh[:, 2] - bbox_xywh[:, 0]
            bbox_xywh[:, 3] = bbox_xywh[:, 3] - bbox_xywh[:, 1]

            bbox_xywh[:, 0] = bbox_xywh[:, 0] + (bbox_xywh[:, 2])/2
            bbox_xywh[:, 1] = bbox_xywh[:, 1] + (bbox_xywh[:, 3])/2
            cls_conf = output[:, 5]
            cls_ids = output[:, 7]

            if bbox_xywh is not None:
                mask = cls_ids == 0.0
                bbox_xywh = bbox_xywh[mask]
                cls_conf = cls_conf[mask]
                outputs = deepsort.update(bbox_xywh, cls_conf, orig_im)

                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]

            if len(buffer) == 64:
                if count % 3 == 0:
                    b = buffer
                    a = time.time()
                    b = np.array(b, dtype=np.float32)
                    print("time:", time.time() - a)
                    b = to_tensor(b)

                    image_batch = torch.tensor(
                        b, dtype=torch.float).unsqueeze(0).cuda()
                    bbox_xyxy = np.array(bbox_xyxy, dtype=np.float)
                    bbox_xyxy[:, [0, 2]] *= scale[0]
                    bbox_xyxy[:, [1, 3]] *= scale[1]
                    detector_bboxes = torch.tensor(
                        bbox_xyxy, dtype=torch.float).unsqueeze(0).cuda()

                    with torch.no_grad():
                        detection_bboxes, detection_classes, detection_probs = \
                            model_sf.eval().forward(image_batch, detector_bboxes_batch=detector_bboxes)

                    detection_bboxes = np.array(detection_bboxes.cpu())
                    detection_classes = np.array(detection_classes)
                    detection_probs = np.array(detection_probs)
                    # Get the corresponding classification label
                    detection_bboxes[:, [0, 2]] /= scale[0]
                    detection_bboxes[:, [1, 3]] /= scale[1]
                imshow(video_name, frame, detection_bboxes,
                       detection_classes, detection_probs, identities, count)
                count += 1

            #cv2.imshow("frame", orig_im)
            key = cv2.waitKey(0)
            if key & 0xFF == ord('q'):
                break
            frames += 1
            # ZZ += 1
            print("FPS of the video is {:5.2f}".format(
                frames / (time.time() - start)))
            # print(ZZ)

        else:
            break
    convert_to_video = f"""ffmpeg -framerate 30 -pattern_type glob -i "demo/outputs/{video_name}/frames/*.jpg" -c:v libx264 -pix_fmt yuv420p demo/outputs/{video_name}/videos/video.mp4"""
    os.system(convert_to_video)
示例#11
0
if __name__ == '__main__':
    args = arg_parse()

    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights("yolov3.weights")
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])

    model.eval()

    img = cv2.imread(args.image)
    img, orig_im, dim = prep_image(args.image, inp_dim)
    im_dim = torch.FloatTensor(dim).repeat(1, 2)

    with torch.no_grad():
        output = model(torch.autograd.Variable(img), False)
    classes = load_classes(args.classes)
    output = write_results(output,
                           confidence=0.5,
                           num_classes=len(classes),
                           nms=True,
                           nms_conf=0.4)

    class_counter = Counter([classes[int(obj[-1])] for obj in output])
    print("Class counts: " + str(class_counter))

    tot_objects = output.size(0)
示例#12
0
def main():
    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    #print("loc: ", loc)

    CUDA = torch.cuda.is_available()

    num_classes = 80

    CUDA = torch.cuda.is_available()
    bbox_attrs = 5 + num_classes

    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()
    status = False
    option = args.option

    #read video based on option
    if option == "webcam":
        # if loc == "front":
        cap_front = cv2.VideoCapture(0)
        #else:
        cap_back = cv2.VideoCapture(1)

    elif option == "video":
        videofile1 = args.file1
        videofile2 = args.file2
        cap_front = cv2.VideoCapture(videofile1)
        cap_back = cv2.VideoCapture(videofile2)
    else:
        imagefile1 = args.file1
        imagefile2 = args.file2
        cap_front = cv2.VideoCapture(imagefile1)
        cap_back = cv2.VideoCapture(imagefile2)
        status = True
    assert cap_back.isOpened(), 'Cannot capture source'
    assert cap_front.isOpened(), 'Cannot capture source'

    max_val_f = 0
    max_val_b = 0
    tmp = 0
    classes = load_classes('data/coco.names')
    colors = pkl.load(open("pallete", "rb"))

    while cap_back.isOpened() or cap_front.isOpened():
        print("-----------------------------------")
        start = time.time()
        #read video
        ret_front, frame_front = cap_front.read()
        ret_back, frame_back = cap_back.read()

        if ret_front and ret_back:
            #preprocessing image
            img_f, orig_im_f, dim_f = prep_image(frame_front, inp_dim)
            img_b, orig_im_b, dim_b = prep_image(frame_back, inp_dim)
            im_dim_f = torch.FloatTensor(dim_f).repeat(1, 2)
            im_dim_b = torch.FloatTensor(dim_b).repeat(1, 2)

            if CUDA:
                im_dim_f = im_dim_f.cuda()
                img_f = img_f.cuda()
                im_dim_b = im_dim_b.cuda()
                img_b = img_b.cuda()

            with torch.no_grad():
                output_f = model(Variable(img_f), CUDA)
                output_b = model(Variable(img_b), CUDA)
            output_f = write_results(output_f,
                                     confidence,
                                     num_classes,
                                     nms=True,
                                     nms_conf=nms_thesh)
            output_b = write_results(output_b,
                                     confidence,
                                     num_classes,
                                     nms=True,
                                     nms_conf=nms_thesh)

            im_dim_f = im_dim_f.repeat(output_f.size(0), 1)
            scaling_factor_f = torch.min(inp_dim / im_dim_f, 1)[0].view(-1, 1)
            im_dim_b = im_dim_b.repeat(output_b.size(0), 1)
            scaling_factor_b = torch.min(inp_dim / im_dim_b, 1)[0].view(-1, 1)

            #front
            output_f[:, [1, 3]] -= (
                inp_dim - scaling_factor_f * im_dim_f[:, 0].view(-1, 1)) / 2
            output_f[:, [2, 4]] -= (
                inp_dim - scaling_factor_f * im_dim_f[:, 1].view(-1, 1)) / 2

            output_f[:, 1:5] /= scaling_factor_f

            for i in range(output_f.shape[0]):
                output_f[i, [1, 3]] = torch.clamp(output_f[i, [1, 3]], 0.0,
                                                  im_dim_f[i, 0])
                output_f[i, [2, 4]] = torch.clamp(output_f[i, [2, 4]], 0.0,
                                                  im_dim_f[i, 1])

            #back
            output_b[:, [1, 3]] -= (
                inp_dim - scaling_factor_b * im_dim_b[:, 0].view(-1, 1)) / 2
            output_b[:, [2, 4]] -= (
                inp_dim - scaling_factor_b * im_dim_b[:, 1].view(-1, 1)) / 2

            output_b[:, 1:5] /= scaling_factor_b

            for i in range(output_b.shape[0]):
                output_b[i, [1, 3]] = torch.clamp(output_b[i, [1, 3]], 0.0,
                                                  im_dim_b[i, 0])
                output_b[i, [2, 4]] = torch.clamp(output_b[i, [2, 4]], 0.0,
                                                  im_dim_b[i, 1])

            #result
            cnt_f = list(
                map(lambda x: write(x, orig_im_f, classes, colors)[1],
                    output_f)).count("person")
            cnt_b = list(
                map(lambda x: write(x, orig_im_b, classes, colors)[1],
                    output_b)).count("person")

            if max_val_f < cnt_f:
                max_val_f = cnt_f
            if max_val_b < cnt_b:
                max_val_b = cnt_b
            print("front person : " + str(cnt_f))
            print("back person : " + str(cnt_b))
            print("max_val_f : " + str(max_val_f))
            print("max_val_b : " + str(max_val_b))

            #devide case
            case_f = check_person(max_val_f, "front")
            case_b = check_person(max_val_b, "back")
            after_img_f = represent_case(orig_im_f, case_f)
            after_img_b = represent_case(orig_im_b, case_b)

            #visualization
            f_h, f_w, f_d = after_img_f.shape
            b_h, b_w, b_d = after_img_b.shape

            h = max(f_h, b_h)

            after_img = np.zeros((h, f_w + b_w, f_d), np.uint8)
            after_img[0:f_h, 0:f_w] = after_img_f[:, :]
            after_img[0:b_h, f_w:f_w + b_w] = after_img_b[:, :]

            cv2.imshow("frame", after_img)

            if status:
                cv2.waitKey(-1)
            cv2.imwrite('output/frame%04d.jpg' % (tmp), after_img)
            tmp += 1

            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            print("\ndetecting time : " + str(time.time() - start))
            if case_f == "red" and case_b == "green":
                print("Go back!")
        else:
            break
示例#13
0
def getFrames():
    cfgfile = "cfg/yolov3.cfg"
    weightsfile = "yolov3.weights"
    num_classes = 80

    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    CUDA = torch.cuda.is_available()

    num_classes = 80
    bbox_attrs = 5 + num_classes

    model = Darknet(cfgfile)
    model.load_weights(weightsfile)

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    videofile = 'video.avi'

    cap = cv2.VideoCapture(0)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():

        ret, frame = cap.read()
        if ret:

            img, orig_im, dim = prep_image(frame, inp_dim)
            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                print("FPS of the video is {:5.2f}".format(
                    frames / (time.time() - start)))
                continue

            output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0,
                                         float(inp_dim)) / inp_dim
            output[:, [1, 3]] *= frame.shape[1]
            output[:, [2, 4]] *= frame.shape[0]

            classes = load_classes('data/coco.names')
            colors = pkl.load(open("pallete", "rb"))

            list(map(lambda x: write(x, orig_im, classes, colors), output))

            ret, jpg = cv2.imencode("test.jpg", orig_im)
            yield b'--boundary\r\nContent-Type: image/jpeg\r\n\r\n' + jpg.tostring(
            ) + b'\r\n\r\n'

            frames += 1
            print("FPS of the video is {:5.2f}".format(frames /
                                                       (time.time() - start)))

        else:
            break
示例#14
0
def video_demo(frame, inp_dim, quadrangle, onnx2trt, deepsort, classes, colors, h_inv):

    img, orig_im, dim = prep_image(frame, inp_dim)
    im_dim = torch.FloatTensor(dim).repeat(1, 2)
    im_dim = im_dim.cuda()


    start = time.time()
    output = onnx2trt.detect_thread(frame, img)

    end = time.time() - start

    if type(output) == int:
        return orig_im, []

    #rescale bbox  416,416 --> 1920 1080
    im_dim = im_dim.repeat(output.size(0), 1)
    scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)  #
    output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
    output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2
    output[:, 1:5] /= scaling_factor
    # target_num = output.shape[0]

    for i in range(output.shape[0]):
        output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0])
        output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1])

    # 针对性改进__BaiYu write_select 替代 write ,只显示四边形内部检测信息
    result_list = list(map(lambda x: write_select(x, orig_im, classes, colors, quadrangle)[1], output))


    # 所有类型的目标都跟踪
    bbox_Tracking = []      #矩形框
    cls_ids_Tracking = []   #类别下标
    cls_conf = []           #置信度

    for bi in range(len(result_list) - 1, -1, -1):
        # if result_list[bi][0] == 2 or result_list[bi][0] == 3:
        if result_list[bi][-1] <= 0:  #根据置信度,删掉不在ROI区域的目标
            continue
        bbox_Tracking.append(result_list[bi][3])
        cls_ids_Tracking.append(result_list[bi][4])
        cls_conf.append(result_list[bi][5])

    outputs_tracking = []
    # # if bbox_Tracking is not None:
    bbox_xcycwh = []

    # 转化为centerX, centerY, width, height bbox形式
    for i in range(len(bbox_Tracking)):
        (cx, cy) = ((bbox_Tracking[i][0] + bbox_Tracking[i][2]) / 2.0, (bbox_Tracking[i][1] + bbox_Tracking[i][3]) / 2.0)
        (w, h) = (bbox_Tracking[i][2] - bbox_Tracking[i][0], bbox_Tracking[i][3] - bbox_Tracking[i][1])
        bbox_xcycwh.append([cx, cy, w, h])

    bbox_xcycwh = np.asarray(bbox_xcycwh)
    cls_conf = np.asarray(cls_conf)
    # global deepsort
    if bbox_xcycwh is not None and len(bbox_xcycwh) > 0:
        outputs_tracking = deepsort.update(bbox_xcycwh, cls_conf, cls_ids_Tracking, frame)

    end = time.time()
    print('runtime: {0:.2f} ms'.format((end - start)*1000))
    
    if outputs_tracking is not None and len(outputs_tracking) > 0:
        # if len(boxes) > 0:
        bbox_xyxy = outputs_tracking[:, :4]   #x1, y1, x2, y2
        identities = outputs_tracking[:, 5]  #track_id
        clsTracking = outputs_tracking[:, 4]  #classLabel index
        trace = outputs_tracking[:, -1]   # trace of object
        #打印追踪后的框bbox  ids
        ori_im = draw_bboxes(frame, bbox_xyxy, identities, clsTracking, trace, h_inv)

    return orig_im, outputs_tracking
def detect_sign(frame,confidence,inp_dim,CUDA,model,num_classes,nms_thesh,classes_gtsrb):
    try:
        b,g,r = cv2.split(frame)       # get b,g,r
        frame_rgb = cv2.merge([r,g,b])     # switch it to rgb
    except:
        return None,"fsdaf"
    img, orig_im, dim = prep_image(frame, inp_dim)
    sign = True
    im_dim = torch.FloatTensor(dim).repeat(1,2)                        

    if CUDA:
        im_dim = im_dim.cuda()
        img = img.cuda()
    with torch.no_grad():
        output = model(Variable(img), CUDA)
    output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)
    
    if type(output) == int:
        print('no prediction observed')
    else: 
        im_dim = im_dim.repeat(output.size(0), 1)
        scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1)

        output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2
        output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2

        output[:,1:5] /= scaling_factor
        gtsrb_labels = np.zeros(output.shape[0])
        _signs_ = []
        _outputs_ = []
        for i in range(output.shape[0]):
            output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0])
            output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1])

            if output[i][-1].round() == 0:
                y1,y2,x1,x2 =output[i][1].int(),output[i][3].int(),output[i][2].int(),output[i][4].int()
                img = frame_rgb[x1:x2,y1:y2]
                if img.shape[0]==0 and img.shape[1]==0 :
                	return None, frame_rgb
                out_vector = output[i][1:].cpu().numpy()
                try:
                    processed_img = np.array(preprocess_img(img))
                    processed_img_uint = np.transpose((processed_img*255).astype(np.uint8),(1,2,0))
                    processed_img_batch = np.expand_dims(processed_img,axis=0)
                    gtsrb_labels[i] = 0 #classification_model.predict_classes(processed_img_batch)
                    output[i][-1] = gtsrb_labels[i]
                    out_vector[-1] = gtsrb_labels[i]
                    frame_rgb, c1, c2, cls = write(output[i], frame_rgb, gtsrb_labels[i],classes_gtsrb)
                    out_vector[0] = c1[0]
                    out_vector[1] = c1[1]
                    out_vector[2] = c2[0]
                    out_vector[3] = c2[1]
                    if (int(c1[0]) == 0 and int(c1[1]) == 0) or (int(c2[1]) == 0 and int(c2[1]) == 0):
                        return None, frame_rgb
                except Exception as e:
                    print(e)
                _outputs_.append(out_vector)
                print(_outputs_)
            else:
                output[i][-1] = output[i][-1] + 21
        if _outputs_ == []:
            return None, frame_rgb
        return np.array(_outputs_), frame_rgb
示例#16
0
def drone():
    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80

    CUDA = torch.cuda.is_available()
    
    bbox_attrs = 5 + num_classes
    
    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0 
    assert inp_dim > 32

    if CUDA:
        model.cuda()
        
    model(get_test_input(inp_dim, CUDA), CUDA)

    model.eval()
    
    videofile = args.video
    
    cap = cv2.VideoCapture(0)
    
    assert cap.isOpened(), 'Cannot capture source'
    
    frames = 0
    start = time.time()    
    while cap.isOpened():
        
        ret, frame = cap.read()
        if ret:
            img, orig_im, dim = prep_image(frame, inp_dim)
            
            im_dim = torch.FloatTensor(dim).repeat(1,2)                        
            
            
            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()
            
            with torch.no_grad():   
                output = model(Variable(img), CUDA)
            
            output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)
            
           

            if type(output) == int:
                frames += 1
                print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue
            
            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1)
            
            output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2
            output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2
            
            output[:,1:5] /= scaling_factor
    
            for i in range(output.shape[0]):
                output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0])
                output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1])
            
             
             
            if output[0][0] == 0.0:
                print("Drone Detected")
                break
            
            #break
            #colors = pkl.load(open("pallete", "rb"))
            
            list(map(lambda x: write(x, orig_im), output))
            
            
            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            frames += 600
            print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))

            
        else:
            break
示例#17
0
def demo():

    params = {
        "video": "video.avi",  # Video to run detection upon
        "dataset": "pasacal",  # Dataset on which the network has been trained
        "confidence": 0.5,  # Object Confidence to filter predictions
        "nms_thresh": 0.4,  # NMS Threshold
        "cfgfile": "cfg/yolov3.cfg",  # Config file
        "weightsfile": "yolov3.weights",  # Weightsfile
        "repo":
        416  # Input resolution of the network.  Increase to increase accuracy.  Decrease to increase speed
    }

    confidence = float(params["confidence"])
    nms_thesh = float(params["nms_thresh"])
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80

    bbox_attrs = 5  #num_classes

    bboxes = []
    xywh = []

    print("Loading network.....")
    model = Darknet(params["cfgfile"])
    model.load_weights(params["weightsfile"])
    print("Network successfully loaded")

    model.net_info["height"] = params["repo"]
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    videofile = params["video"]
    # activate our centroid tracker
    (H, W) = (None, None)
    ct = CentroidTracker(maxDisappeared=40, maxDistance=50)
    trackers = []
    trackableObjects = {}
    totalFrames = 0
    totalDown = 0
    totalUp = 0

    # set 0 for debug
    cap = cv2.VideoCapture(0)
    fps = FPS().start()
    rects = []
    status = "Waiting.."

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():

        ret, frame = cap.read()
        if ret:

            img, orig_im, dim = prep_image(frame, inp_dim)
            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            with torch.no_grad():
                output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                print(
                    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                )
                print("FPS of the video is {:5.2f}".format(
                    frames / (time.time() - start)))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

            output[:,
                   [1, 3]] -= (inp_dim -
                               scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
            output[:,
                   [2, 4]] -= (inp_dim -
                               scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

            output[:, 1:5] /= scaling_factor
            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                im_dim[i, 0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                im_dim[i, 1])

            for i in output:
                x0 = i[1].int()
                y0 = i[2].int()
                x1 = i[3].int()
                y1 = i[4].int()
                bbox = (x0, y0, x1, y1)
                bboxes.append(bbox)
                print(bbox)
                w = x1 - x0
                h = y1 - y0
                xywh.append((x0, y0, w, h))
                print(x0, y0, w, h)

                tracker = dlib.correlation_tracker()
                rect = dlib.rectangle(x0, y0, x1, y1)
                tracker.start_track(rgb, rect)

                trackers.append(tracker)

            for tracker in trackers:
                # set the status of the system to tracking
                status = "Tracking.."
                # update the tracker and grap the update position
                tracker.update(rgb)
                pos = tracker.get_position()
                # Unpack the position
                x0 = int(pos.left())
                y0 = int(pos.top())
                x1 = int(pos.right())
                y1 = int(pos.bottom())
                #add the bounding box coordiants to the rectangle
                rects.append((x0, y0, x1, y1))
                # moving 'up' or 'down'
            cv2.line(frame, (0, h // 2), (w, h // 2), (0, 255, 255), 2)
            objects = ct.update(rects)
            # Loop through the tracked objects
            for (objectID, centroid) in objects.items():
                to = trackableObjects.get(objectID, None)
                if to is None:
                    to = TrackableObject(objectID, centroid)
                else:
                    y = [c[1] for c in to.centroids]
                    direction = centroid[1] - np.mean(y)
                    to.centroids.append(centroid)
                    if not to.counted:
                        # if the direction is negative
                        # indicatin gthe object is moving up
                        # and the centroid is above the center line
                        # count the object
                        if direction < 0 and centroid[1] < h // 2:
                            totalUp += 1
                            to.counted = True
                        # if the direction is positive
                        # indicating the object is moving down
                        # and centroid is below the center line
                        elif direction > 0 and centroid[1] > h // 2:
                            totalDown += 1
                            to.counted = True

                    # store the trackable object in the dictionary
                    trackableObjects[objectID] = to

                #draw both the ID of the object and the centroid of the object
                # on the output frame
                text = "ID {}".format(objectID)
                cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0),
                           -1)
                info = [("Up", totalUp), ("Down", totalDown),
                        ("Status", status)]
                for (i, (k, v)) in enumerate(info):
                    text = "{}: {}".format(k, v)
                    cv2.putText(frame, text, (10, h - ((i * 20) + 20)),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 225), 2)

            #return bboxes

            classes = load_classes('data/coco.names')
            colors = pkl.load(open("pallete", "rb"))

            # write bbox
            list(map(lambda x: write(x, orig_im, classes, colors), output))

            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            frames += 1
            fps.update()
            fps.stop()
            print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
            print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))

            #return xywh

        else:
            break
示例#18
0
    stanfordBookstore = cv2.VideoCapture(stanfordBookstorePath)
    assert capCam1.isOpened(), 'not capture source'
    assert stanfordBookstore.isOpened(
    ), 'cannot capture Stanford bookstore video'

    frames = 0
    start = time.time()
    while capCam1.isOpened() and stanfordBookstore.isOpened():
        capCam1 = cv2.VideoCapture(cam1.url)

        ret, frame = capCam1.read()
        retSB, frameSB = stanfordBookstore.read()

        if ret and retSB:

            img, orig_im, dim = prep_image(frame, inp_dim)

            imgSB, orig_imSB, dimSB = prep_image(frameSB, inp_dim)
            im_dimSB = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

                im_dimSB = im_dimSB.cuda()
                imgSB = imgSB.cuda()

            output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
示例#19
0
def process(videofile, model, args):

    print(videofile)
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    num_classes = 80

    CUDA = torch.cuda.is_available()

    bbox_attrs = 5 + num_classes

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    cap = cv2.VideoCapture(videofile)
    FRAME_WIDTH = cap.get(3)
    FRAME_HEIGHT = cap.get(4)
    FRAME_FPS = cap.get(5)
    FRAME_FOURCC = cap.get(6)
    # print (FRAME_WIDTH, FRAME_HEIGHT, FRAME_FPS, FRAME_FOURCC)
    # fourcc = cv2.VideoWriter_fourcc(*'XVID')
    output_file = args.output + 'result_' + videofile.replace(args.videos, '')
    print(output_file)
    out = cv2.VideoWriter(output_file, int(FRAME_FOURCC), FRAME_FPS,
                          (int(FRAME_WIDTH), int(FRAME_HEIGHT)))
    print(FRAME_WIDTH, FRAME_HEIGHT)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    # start = time.time()
    start_time = time.time()
    while cap.isOpened():

        ret, frame = cap.read()
        if ret:
            img, orig_im, dim = prep_image(frame, inp_dim)

            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            with torch.no_grad():
                output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                # print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
                # print('============================================================')
                if not args.noshow:
                    cv2.imshow("frame", orig_im)
                if args.output is not None:
                    out.write(orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

            output[:,
                   [1, 3]] -= (inp_dim -
                               scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
            output[:,
                   [2, 4]] -= (inp_dim -
                               scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

            output[:, 1:5] /= scaling_factor

            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                im_dim[i, 0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                im_dim[i, 1])

            list(map(lambda x: write(x, orig_im), output))

            if not args.noshow:
                cv2.imshow("frame", orig_im)
            if args.output is not None:
                out.write(orig_im)

            # cv2.imshow("frame", orig_im)
            # out.write(orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            frames += 1
            # print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))

        else:
            break
    # fourcc = cv2.writer (*'XVID')
    cap.release()
    out.release()
    end_time = time.time()
    print("time: {}".format(str(end_time - start_time)))
示例#20
0
def image_get(q, window_name):
    cfgfile = "cfg/yolov3.cfg"
    weightsfile = "yolov3.weights"
    timeF = 20
    k = 0
    n = 0  # 计数
    frames = 0
    i = 0
    start = 0
    start = time.time()

    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    CUDA = torch.cuda.is_available()

    num_classes = 2
    bbox_attrs = 5 + num_classes

    model = Darknet(args.cfgfile)
    if args.weights_path.endswith(".weights"):
        # Load darknet weights
        model.load_darknet_weights(args.weights_path)
    else:
        # Load checkpoint weights
        model.load_state_dict(torch.load(args.weights_path))

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32
    if CUDA:
        model.cuda()
    model.eval()
    cv2.namedWindow(window_name, flags=cv2.WINDOW_FREERATIO)
    while True:
        frame = q.get()
        img, orig_im, dim = prep_image(frame, inp_dim)
        im_dim = torch.FloatTensor(dim).repeat(1, 2)
        if CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()
        output = model(Variable(img), CUDA)
        output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh)

        output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim)) / inp_dim
        #            im_dim = im_dim.repeat(output.size(0), 1)
        output[:, [1, 3]] *= frame.shape[1]
        output[:, [2, 4]] *= frame.shape[0]

        classes = load_classes('data/classes.names')
        colors = pkl.load(open("pallete", "rb"))

        list(map(lambda x: write(classes, colors, x, orig_im), output))
        list1 = list(map(lambda x: write1(x, orig_im), output))
        cv2.imshow(window_name, orig_im)#显示视频
        cv2.waitKey(1)
        frames += 1
        print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))

        n = n + 1
        i += 1

        if (n % timeF == 0):  # 每隔timeF帧进行存储操作
            for j in range(0, len(list1)):
                if list1[j] == 1:
                    k = k + 1
                if list1[j] == 0:
                    k = 0
            if k != 0:
                cv2.imwrite('camera/{}.jpg'.format(i), orig_im)  # 当识别到未带安全帽时存储为图像
示例#21
0
 ttl_num = len(imlist)
 
 
 inter = 0
 detect_flag = False
 center_pos = lambda x: (x[0] + x[1])/2
 
 # deal with initial identities
 
 offset = 0
 print('Computing initial identities...')
 
 
 
 while True:
     frame, ogl, dim = prep_image(imlist[offset], inp_dim)
     position_pre = measure(frame, dim)[:, 1:5].numpy()
     position_pre[:, 0] = [(x[0]+x[2])/2 for x in position_pre]
     position_pre[:, 1] = [(x[1]+x[3])/2 for x in position_pre]
     
     frame, ogl, dim = prep_image(imlist[offset + interval], inp_dim)
     position_post = measure(frame, dim)[:, 1:5].numpy()
     position_post[:, 0] = [(x[0]+x[2])/2 for x in position_post]
     position_post[:, 1] = [(x[1]+x[3])/2 for x in position_post]
     
     identity.max_dim = dim
     
     
     pos_map, paired = pair_position(position_pre, position_post)
     
     if paired:
示例#22
0
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'XVID')

    queue = Queue()
    recorderthread = RecorderThread(queue, './output', fourcc, (width, height))
    recorderthread.start()

    frames = 0
    start = time.time()    
    while cap.isOpened():
        
        ret, frame = cap.read()
        if ret:
            

            img, orig_im, dim = prep_image(frame, inp_dim, args.rotation)
            
            im_dim = torch.FloatTensor(dim).repeat(1,2)                        
            
            
            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()
            
            with torch.no_grad():   
                output = model(Variable(img), CUDA)
            output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)

            if type(output) == int:
                frames += 1
                print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
示例#23
0
def main(args, model):

    images = args.images
    batch_size = int(args.bs)
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)

    CUDA = torch.cuda.is_available()

    num_classes = 80
    classes = load_classes('data/coco.names')

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    # If there's a GPU availible, put the model on GPU
    if CUDA:
        model.cuda()

    # Set the model in evaluation mode
    model.eval()

    read_dir = time.time()
    # Detection phase
    try:
        imlist = [
            osp.join(osp.realpath('.'), images, img)
            for img in os.listdir(images)
            if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1]
            == '.jpeg' or os.path.splitext(img)[1] == '.jpg'
        ]
    except NotADirectoryError:
        imlist = [osp.join(osp.realpath('.'), images)]
    except FileNotFoundError:
        print("No file or directory with the name {}".format(images))
        exit()

    if not os.path.exists(args.det):
        os.makedirs(args.det)

    load_batch = time.time()

    batches = [prep_image(img, inp_dim) for img in imlist]
    im_batches = [x[0]
                  for x in batches]  # each shape (1, 3, H, W) resized H, W
    orig_ims = [x[1] for x in batches]  # each shape (1, 3, H0, W0) not resized
    im_dim_list = torch.FloatTensor([x[2] for x in batches
                                     ]).repeat(1, 2)  # (nr_img, 4)

    if CUDA:
        im_dim_list = im_dim_list.cuda()

    if batch_size != 1:
        leftover = 1 if len(im_dim_list) % batch_size else 0
        num_batches = len(imlist) // batch_size + leftover
        im_batches = [
            torch.cat(
                (im_batches[i * batch_size:min((i + 1) *
                                               batch_size, len(im_batches))]))
            for i in range(num_batches)
        ]

    i = 0

    write = False

    start_det_loop = time.time()

    for batch in im_batches:
        # load the image
        if CUDA:
            batch = batch.cuda()

        with torch.no_grad():
            prediction = model(batch, CUDA)

        prediction = write_results(prediction,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)

        if type(prediction) == int:
            i += 1
            continue

        prediction[:, 0] += i * batch_size

        if not write:
            output = prediction
            write = 1
        else:
            output = torch.cat((output, prediction))

        i += 1

        if CUDA:
            torch.cuda.synchronize()

    try:
        output
    except NameError:
        print("No detections were made")
        exit()

    im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long())

    scaling_factor = torch.min(inp_dim / im_dim_list, 1)[0].view(-1, 1)

    output[:, [1, 3]] -= (inp_dim -
                          scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
    output[:, [2, 4]] -= (inp_dim -
                          scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

    output[:, 1:5] /= scaling_factor

    for i in range(output.shape[0]):
        output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim_list[i,
                                                                            0])
        output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim_list[i,
                                                                            1])

    output_recast = time.time()

    class_load = time.time()

    draw = time.time()

    def _pad_bbox_to_square(c1, c2, pad_ratio=0.1):
        x1, y1 = c1  # left up
        x2, y2 = c2  # right down
        w, h = x2 - x1, y2 - y1
        if w > h:
            a, x, y = w, x1, y1 - (w - h) / 2.0
        else:
            a, x, y = h, x1 - (h - w) / 2.0, y1
        # expand bbox
        x = int(x - a * pad_ratio / 2)
        y = int(y - a * pad_ratio / 2)
        a = int(a + a * pad_ratio)
        return a, x, y

    def _write(a, x, y, img, filename):
        crop = img[y:y + a, x:x + a]
        crop = cv2.resize(crop, (224, 224))
        cv2.imwrite(filename, crop)

    # crop, resize and save person detection
    img_idx2size = {}
    for o in output:
        if int(o[-1]) == 0:  # person: 0
            img_idx = int(o[0])
            a, x, y = _pad_bbox_to_square(
                as_numpy(o[1:3].int()).tolist(),
                as_numpy(o[3:5].int()).tolist())
            img = orig_ims[img_idx]
            if 0 < y and y + a < img.shape[0] and 0 < x and x + a < img.shape[
                    1]:
                if img_idx in img_idx2size.keys(
                ) and a < img_idx2size[img_idx]:
                    continue
                save_filename = "{}/{}_cropped.png".format(args.det, img_idx)
                _write(a, x, y, img, save_filename)
                img_idx2size[img_idx] = a

    end = time.time()

    print()
    print("SUMMARY")
    print("----------------------------------------------------------")
    print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
    print()
    print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
    print("{:25s}: {:2.3f}".format("Loading batch",
                                   start_det_loop - load_batch))
    print("{:25s}: {:2.3f}".format(
        "Detection (" + str(len(imlist)) + " images)",
        output_recast - start_det_loop))
    print("{:25s}: {:2.3f}".format("Output Processing",
                                   class_load - output_recast))
    print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
    print("{:25s}: {:2.3f}".format("Average time_per_img",
                                   (end - load_batch) / len(imlist)))
    print("----------------------------------------------------------")

    torch.cuda.empty_cache()
示例#24
0
def main():

    confidence = 0.5
    nms_thesh = 0.4
    num_classes = 80
    classes = load_classes('data/coco.names')

    print('cuda device count: ', torch.cuda.device_count())
    print("Loading network.....")
    net = Darknet('cfg/yolov3.cfg')
    net.load_weights('yolov3.weights')
    print("Network successfully loaded")
    net = net.to('cuda:0')
    net = net.eval()
    print('print model')
    print('model: ', net)

    #------------------------input images------------------------------------------------
    input, origin, dim = prep_image('imgs/dog.jpg', 320)
    print('input:', input)
    input = input.to('cuda:0')
    print(input.shape)
    prediction = net(input, True)
    print('pre shape: ', prediction.shape)
    print('pre : ', prediction)
    prediction = write_results(prediction,
                               confidence,
                               num_classes,
                               nms=True,
                               nms_conf=nms_thesh)
    print('pre shape1: ', prediction.shape)
    print('pre1: ', prediction)

    scaling_factor = min(320 / dim[0], 320 / dim[1], 1)
    print(scaling_factor)
    prediction[:, [1, 3]] -= (320 - scaling_factor * dim[0]) / 2
    prediction[:, [2, 4]] -= (320 - scaling_factor * dim[1]) / 2
    print('pre2: ', prediction)
    prediction[:, 1:5] /= scaling_factor
    print('pre3: ', prediction)

    for i in range(prediction.shape[0]):
        prediction[i, [1, 3]] = torch.clamp(prediction[i, [1, 3]], 0.0, dim[0])
        prediction[i, [2, 4]] = torch.clamp(prediction[i, [2, 4]], 0.0, dim[1])
    print('pre4: ', prediction)

    def write(x, batches, res):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        img = res
        cls = int(x[-1])
        label = "{0}".format(classes[cls])
        cv2.rectangle(img, c1, c2, (255, 0, 0), 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2, (255, 0, 0), -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                    cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
        return img

    list(map(lambda x: write(x, input, origin), prediction))
    cv2.imwrite('infout.png', origin)

    #------------------------input ones------------------------------------------------
    #print('state dict: ', net.state_dict().keys())
    tmp = torch.ones(1, 3, 320, 320).to('cuda:0')
    print('input: ', tmp)
    out = net(tmp)

    print('output:', out)

    summary(net, (3, 320, 320))
    #return
    f = open("yolov3.wts", 'w')
    f.write("{}\n".format(len(net.state_dict().keys())))
    for k, v in net.state_dict().items():
        print('key: ', k)
        print('value: ', v.shape)
        vr = v.reshape(-1).cpu().numpy()
        f.write("{} {}".format(k, len(vr)))
        for vv in vr:
            f.write(" ")
            f.write(struct.pack(">f", float(vv)).hex())
        f.write("\n")
示例#25
0
    def run(self, frame, frames):
        if self.skip_flag == 0:
            inp_dim = int(self.model.net_info["height"])
            assert inp_dim % 32 == 0
            assert inp_dim > 32

            self.model.eval()
            img, orig_im, dim = prep_image(frame, inp_dim)
            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if self.CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            with torch.no_grad():
                output = self.model(Variable(img), self.CUDA)

            output = write_results(output,
                                   self.confidence,
                                   self.num_classes,
                                   nms=True,
                                   nms_conf=self.nms_thesh)
            if type(output) == int:
                return frame

            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

            output[:,
                   [1, 3]] -= (inp_dim -
                               scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
            output[:,
                   [2, 4]] -= (inp_dim -
                               scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

            output[:, 1:5] /= scaling_factor

            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                im_dim[i, 0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                im_dim[i, 1])

            classes = load_classes('data/coco.names')
            colors = pkl.load(open("pallete", "rb"))

            # print('output: {}'.format(output.shape[0]))
            for i in range(output.shape[0]):
                data_list = np.array([[
                    frames,
                    int(output[i, 1]),
                    int(output[i, 3]),
                    int(output[i, 2]),
                    int(output[i, 4]), classes[int(output[i, 7])]
                ]])
                self.data = np.vstack([self.data, data_list])
                # print(self.data)

            # print(self.data)
            list(
                map(lambda x: write(
                    x,
                    orig_im,
                    classes,
                    colors,
                    frames,
                ), output))
            return orig_im
示例#26
0
def run(self):

    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)

    # fig = plt.figure()
    # ax1 = fig.add_subplot(1,1,1)
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80

    CUDA = torch.cuda.is_available()

    bbox_attrs = 5 + num_classes

    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model(get_test_input(inp_dim, CUDA), CUDA)

    model.eval()

    videofile = args.video

    cap = cv2.VideoCapture(videofile)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()

    first_iteration_indicator = 1

    count = 0

    fgbg = cv2.createBackgroundSubtractorMOG2()

    while cap.isOpened():

        for x in range(11):
            cap.grab()

        if (first_iteration_indicator == 1):
            ret, frame = cap.read()
            first_frame = copy.deepcopy(frame)
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            height, width = gray.shape[:2]
            accum_image = np.zeros((height, width), np.float64)
            first_iteration_indicator = 0
        else:
            ret, frame = cap.read()
            if ret:

                img, orig_im, dim = prep_image(frame, inp_dim)

                im_dim = torch.cuda.FloatTensor(dim).repeat(1, 2)

                if CUDA:
                    im_dim = im_dim.cuda()
                    img = img.cuda()

                with torch.no_grad():
                    output = model(Variable(img), CUDA)

                output = write_results(output,
                                       confidence,
                                       num_classes,
                                       nms=True,
                                       nms_conf=nms_thesh)

                if type(output) == int:
                    frames += 1
                    # print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
                    cv2.imshow("frame", orig_im)
                    key = cv2.waitKey(1)
                    if key & 0xFF == ord('q'):
                        break
                    continue

                im_dim = im_dim.repeat(output.size(0), 1)
                scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

                output[:, [1, 3]] -= (
                    inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
                output[:, [2, 4]] -= (
                    inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

                output[:, 1:5] /= scaling_factor

                for i in range(output.shape[0]):
                    output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                    im_dim[i, 0])
                    output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                    im_dim[i, 1])

                classes = load_classes('data/coco.names')
                colors = pkl.load(open("pallete", "rb"))

                m = list(map(lambda x: write(x, orig_im), output))

                cv2.imshow("frame", orig_im)
                orig_im.fill(0)

                h = list(map(lambda x: write_heatmap(x, orig_im), output))

                s = len(m)

                interface.self.update_people_number(self, s)

                if (count == 0):
                    f = open("count.txt", "w+")
                    f.write("%d,%d \r\n" % (count, s))
                    count += 1

                else:
                    f = open("count.txt", "a+")
                    f.write("%d,%d \r\n" % (count, s))
                    count += 1

                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                frames += 1
                # print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

                fgmask = fgbg.apply(gray)

                thresh = 150
                maxValue = 10
                ret, th1 = cv2.threshold(fgmask, thresh, maxValue,
                                         cv2.THRESH_BINARY)

                cv2.imwrite('diff-th1.jpg', th1)

                accum_image = cv2.add(accum_image, th1, dtype=cv2.CV_64F)
            else:
                break
    accum_image = np.uint8(accum_image)
    color_image = im_color = cv2.applyColorMap(accum_image, cv2.COLORMAP_JET)
    # overlay the color mapped image to the first frame
    result_overlay = cv2.addWeighted(first_frame, 0.4, color_image, 0.4, 0)

    # save the final overlay image
    cv2.imwrite('diff-overlay.jpg', result_overlay)

    graph_data = open('count.txt', 'r').read()
    lines = graph_data.split('\n')
    xs = []
    ys = []
    for line in lines:
        if len(line) > 1:
            x, y = line.split(',')
            xs.append(int(x))
            ys.append(int(y))
    plt.plot(xs, ys)
    plt.savefig('test.jpg')

    graph_data_2 = open('TownCentre-groundtruth.txt', 'r').read()
    lines = graph_data_2.split('\n')
    xs_g = []
    ys_g = []
    x_check = 0
    count_g = 0
    for line in lines:
        if len(line) > 1:
            id, frame, cq, cq2, x_1, y_1, x_2, y_2, x_3, y_3, x_4, _y_4 = line.split(
                ',')
            if (x_check == 0):
                if (x_check == frame):
                    count_g = count_g + 1
                else:
                    xs_g.append(int(count_g))
                    ys_g.append(int(x_check))
                    x_check = frame
                    count_g = 1
            else:
                if (x_check == frame):
                    count_g = count_g + 1
                else:
                    xs_g.append(int(count_g))
                    ys_g.append(int(x_check))
                    x_check = frame
                    count_g = 1
    plt.plot(xs_g, ys_g)
    plt.savefig('ground.jpg')

    # cleanup1
    cap.release()
    cv2.destroyAllWindows()
示例#27
0
def stream_yolo_ready():
    cfgfile = "cfg/yolov3.cfg"
    weightsfile = "yolov3.weights"
    num_classes = 5
    confidence = 0.7
    nms_thesh = 0.5
    start = 0
    CUDA = torch.cuda.is_available()

    num_classes = 5
    bbox_attrs = 5 + num_classes

    model = Darknet(cfgfile)
    model.load_weights(weightsfile)

    model.net_info["height"] = 416
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    videofile = 'video.avi'

    cap = cv2.VideoCapture(0)
    assert cap.isOpened(), 'Cannot capture source'
    cap.set(3, 416)
    cap.set(4, 416)

    global frames
    global picture
    frames = 0

    start = time.time()
    while cap.isOpened():

        ret, frame = cap.read()
        if ret:

            img, orig_im, dim = prep_image(frame, inp_dim)
            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)
            global label_list
            global flag
            if type(output) == int:
                frames += 1
                print("FPS of the video is {:5.2f}".format(
                    frames / (time.time() - start)))
                picture = orig_im
                label_list = list(map(lambda x: write(x, orig_im), output))
                print("label_list : ", label_list)
                collision(label_list)
                flag = 0

                #이미지 저장하는 코드
                #cv2.imwrite('yolo/static/images/fire_accident.jpg',orig_im)

                ret2, jpeg2 = cv2.imencode('.jpg', orig_im)
                detect_image_byte = jpeg2.tobytes()

                yield (b'--frame\r\n'
                       b'Content-Type: image/jpeg\r\n\r\n' +
                       detect_image_byte + b'\r\n\r\n')
                #cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0,
                                         float(inp_dim)) / inp_dim

            im_dim = im_dim.repeat(output.size(0), 1)
            output[:, [1, 3]] *= frame.shape[1]
            output[:, [2, 4]] *= frame.shape[0]
            #전역변수 선언
            picture = orig_im
            label_list = list(map(lambda x: write(x, orig_im), output))
            print("label_list : ", label_list)
            collision(label_list)
            flag = 0
            #cv2.imshow("frame", orig_im)

            #이미지 저장하는 코드

            ret2, jpeg2 = cv2.imencode('.jpg', orig_im)

            detect_image_byte = jpeg2.tobytes()
            yield (b'--frame\r\n'
                   b'Content-Type: image/jpeg\r\n\r\n' + detect_image_byte +
                   b'\r\n\r\n')
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            frames += 1

            print("FPS of the video is {:5.2f}".format(frames /
                                                       (time.time() - start)))
        else:
            break
示例#28
0
def demo():

    params = {
        "video": "video.avi",  # Video to run detection upon
        "dataset": "pasacal",  # Dataset on which the network has been trained
        "confidence": 0.5,  # Object Confidence to filter predictions
        "nms_thresh": 0.4,  # NMS Threshold
        "cfgfile": "cfg/yolov3.cfg",  # Config file
        "weightsfile": "yolov3.weights",  # Weightsfile
        "repo":
        416  # Input resolution of the network.  Increase to increase accuracy.  Decrease to increase speed
    }

    confidence = float(params["confidence"])
    nms_thesh = float(params["nms_thresh"])
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80

    bbox_attrs = 5 + num_classes

    bboxes = []
    xywh = []

    print("Loading network.....")
    model = Darknet(params["cfgfile"])
    model.load_weights(params["weightsfile"])
    print("Network successfully loaded")

    model.net_info["height"] = params["repo"]
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    videofile = params["video"]

    # set 0 for debug
    cap = cv2.VideoCapture(0)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():

        ret, frame = cap.read()
        print("ret: ", ret)
        print("frame: ", frame.shape)
        if ret:

            img, orig_im, dim = prep_image(frame, inp_dim)
            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            with torch.no_grad():
                output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                print(
                    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                )
                print("FPS of the video is {:5.2f}".format(
                    frames / (time.time() - start)))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

            output[:,
                   [1, 3]] -= (inp_dim -
                               scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
            output[:,
                   [2, 4]] -= (inp_dim -
                               scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

            output[:, 1:5] /= scaling_factor

            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                im_dim[i, 0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                im_dim[i, 1])

            print("output: ", output)
            print("output: ", output.shape)

            for i in output:
                x0 = i[1].int()
                y0 = i[2].int()
                x1 = i[3].int()
                y1 = i[4].int()
                bbox = (x0, y0, x1, y1)
                bboxes.append(bbox)
                print(bbox)
                w = x1 - x0
                h = y1 - y0
                xywh.append((x0, y0, w, h))
                print(x0, y0, w, h)

            #return bboxes

            classes = load_classes('data/coco.names')
            colors = pkl.load(open("pallete", "rb"))

            # write bbox
            list(map(lambda x: write(x, orig_im, classes, colors), output))

            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            frames += 1
            print("FPS of the video is {:5.2f}g7".format(
                frames / (time.time() - start)))
            #return xywh

        else:
            break
示例#29
0
    tl_camera.start_video_stream(display=False)
    tl_camera.set_fps("low")
    tl_camera.set_resolution("low")
    tl_camera.set_bitrate(6)

    frames = 0
    start = time.time()
    i = 0

    # cap = cv2.VideoCapture('udp://192.168.10.1:11111')
    # assert cap.isOpened(), 'Cannot capture source'

    while (True):
        frame = tl_camera.read_video_frame(strategy="newest")
        img, orig_im, dim = prep_image(frame, inp_dim)
        output = model(Variable(img), CUDA)
        output = write_results(output,
                               confidence,
                               num_classes,
                               nms=True,
                               nms_conf=nms_thesh)

        if type(output) == int:
            frames += 1
            print("FPS of the video is {:5.2f}".format(frames /
                                                       (time.time() - start)))
            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
示例#30
0
    videofile = args.video
    print(videofile)
    cap = cv2.VideoCapture(videofile)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():

        ret, frame = cap.read()  # frame으로 가져오는 부분
        if ret:

            frame = laneDetection(frame)
            img, orig_im, dim = prep_image(frame, inp_dim)  #이미지 정보에 대한것 가져오기
            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            with torch.no_grad():
                output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)
            if type(output) == int:
                frames += 1