Python output_boxes示例，utils.output_boxes Python示例

示例#1

0

显示文件

文件： image.py 项目： Bakso14/YOLOv3_TF2

def main():

    model = YOLOv3Net(cfgfile, model_size, num_classes)
    model.load_weights(weightfile)

    class_names = load_class_names(class_name)

    image = cv2.imread(img_path)
    image = np.array(image)
    image = tf.expand_dims(image, 0)

    resized_frame = resize_image(image, (model_size[0], model_size[1]))
    pred = model.predict(resized_frame)

    boxes, scores, classes, nums = output_boxes( \
        pred, model_size,
        max_output_size=max_output_size,
        max_output_size_per_class=max_output_size_per_class,
        iou_threshold=iou_threshold,
        confidence_threshold=confidence_threshold)

    image = np.squeeze(image)
    img = draw_outputs(image, boxes, scores, classes, nums, class_names)

    win_name = 'Image detection'
    cv2.imshow(win_name, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

示例#2

0

显示文件

def main(img_path, image_name):
    model = YOLOv3Net(cfgfile,model_size,num_classes)
    model.load_weights(weightfile)
    class_names = load_class_names(class_name)
    image = cv2.imread(os.path.join(img_path, "{}.jpg".format(image_name)))
    image = np.array(image)
    image = tf.expand_dims(image, 0)
    resized_frame = resize_image(image, (model_size[0],model_size[1]))
    pred = model.predict(resized_frame)
    boxes, scores, classes, nums = output_boxes( \
        pred, model_size,
        max_output_size=max_output_size,
        max_output_size_per_class=max_output_size_per_class,
        iou_threshold=iou_threshold,
        confidence_threshold=confidence_threshold)
    image = np.squeeze(image)
    img = draw_outputs(image, boxes, scores, classes, nums, class_names)
    # win_name = 'Image detection'
    # cv2.imshow(win_name, img)
    # time.sleep(20)
    # cv2.destroyAllWindows()

    #If you want to save the result, uncommnent the line below:
    os.path.join(img_path, 'image_yolo.jpg')
    cv2.imwrite(os.path.join(img_path, "{}_yolo.jpg".format(image_name)), img)

示例#3

0

显示文件

def main():

    model = YOLOv3Net(cfgfile, model_size, num_classes)

    model.load_weights(weightfile)

    class_names = load_class_names(class_name)

    win_name = 'Yolov3 detection'
    cv2.namedWindow(win_name)

    #specify the vidoe input.
    # 0 means input from cam 0.
    # For vidio, just change the 0 to video path
    cap = cv2.VideoCapture(0)
    frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH),
                  cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    try:
        while True:
            start = time.time()
            ret, frame = cap.read()
            if not ret:
                break

            resized_frame = tf.expand_dims(frame, 0)
            resized_frame = resize_image(resized_frame,
                                         (model_size[0], model_size[1]))

            pred = model.predict(resized_frame)

            boxes, scores, classes, nums = output_boxes( \
                pred, model_size,
                max_output_size=max_output_size,
                max_output_size_per_class=max_output_size_per_class,
                iou_threshold=iou_threshold,
                confidence_threshold=confidence_threshold)

            img = draw_outputs(frame, boxes, scores, classes, nums,
                               class_names)
            cv2.imshow(win_name, img)

            stop = time.time()

            seconds = stop - start
            # print("Time taken : {0} seconds".format(seconds))

            # Calculate frames per second
            fps = 1 / seconds
            print("Estimated frames per second : {0}".format(fps))

            key = cv2.waitKey(1) & 0xFF

            if key == ord('q'):
                break

    finally:
        cv2.destroyAllWindows()
        cap.release()
        print('Detections have been performed successfully.')

示例#4

0

显示文件

文件： image.py 项目： peternabil/yolov3-tf-api

def main(img,model):
    # model = YOLOv3Net(cfgfile,model_size,num_classes)
    # model.load_weights(weightfile)
    #
    class_names = load_class_names(class_name)

    # image = cv2.imread(img_path)
    image = img
    image = np.array(image)
    image = tf.expand_dims(image, 0)

    resized_frame = resize_image(image, (model_size[0],model_size[1]))
    pred = model.predict(resized_frame)

    boxes, scores, classes, nums = output_boxes( \
        pred, model_size,
        max_output_size=max_output_size,
        max_output_size_per_class=max_output_size_per_class,
        iou_threshold=iou_threshold,
        confidence_threshold=confidence_threshold)

    image = np.squeeze(image)
    img,person_num = draw_outputs(image, boxes, scores, classes, nums, class_names)
    # cv2.putText(img, str(person_num)+" Persons", (10,200), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 2, cv2.LINE_AA)
    win_name = 'Image detection'
    return img,person_num,boxes,scores, classes, nums,class_names
    cv2.imshow(win_name, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

示例#5

0

显示文件

def get_prediction(inputimage):
    model = YOLOv3Net(cfgfile, model_size, num_classes)
    model.load_weights(weightfile)
    class_names = load_class_names(class_name)
    win_name = 'Yolov3 detection'
    cv2.namedWindow(win_name)
    #specify the vidoe input.
    # 0 means input from cam 0.
    # For vidio, just change the 0 to video path
    frame = cv2.imread(inputimage, 1)
    frame_size = frame.shape

    try:
        # Read frame
        resized_frame = tf.expand_dims(frame, 0)
        resized_frame = resize_image(resized_frame,
                                     (model_size[0], model_size[1]))
        pred = model.predict(resized_frame)
        boxes, scores, classes, nums = output_boxes( \
            pred, model_size,
            max_output_size=max_output_size,
            max_output_size_per_class=max_output_size_per_class,
            iou_threshold=iou_threshold,
            confidence_threshold=confidence_threshold)
        img = draw_outputs(frame, boxes, scores, classes, nums, class_names)
        cv2.imshow(win_name, img)
        cv2.imwrite('outputimgage.jpg', img)
        # print("Time taken : {0} seconds".format(seconds))
        # Calculate frames per second

    finally:
        cv2.waitKey()
        cv2.destroyAllWindows()
        print('Detections have been performed successfully.')
        return img

示例#6

0

显示文件

文件： image.py 项目： Shrimadh/YOLOv3-Tensorflow

def main():
    model = YOLOv3Net(cfgfile, model_size, num_classes)
    model.load_weights(weightfile)
    class_names = load_class_names(class_name)
    print("class_names", class_names)
    image = cv2.imread(img_path)
    image = np.array(image)
    image = tf.expand_dims(image, 0)
    resized_frame = resize_image(image, (model_size[0], model_size[1]))
    pred = model.predict(resized_frame)
    boxes, scores, classes, nums = output_boxes( \
        pred, model_size,
        max_output_size=max_output_size,
        max_output_size_per_class=max_output_size_per_class,
        iou_threshold=iou_threshold,
        confidence_threshold=confidence_threshold)
    image = np.squeeze(image)
    img = draw_outputs(image, boxes, scores, classes, nums, class_names)
    cv2.imwrite('result1.jpg', img)

示例#7

0

显示文件

def detect_image(img_path):
    model = YOLOv3Net(cfg.CFGFILE,cfg.MODEL_SIZE,cfg.NUM_CLASSES)
    model.load_weights(cfg.WEIGHTFILE)
    class_names = load_class_names(cfg.CLASS_NAME)
    image = cv2.imread(img_path)
    image = np.array(image)
    image = tf.expand_dims(image, 0)
    resized_frame = resize_image(image, (cfg.MODEL_SIZE[0],cfg.MODEL_SIZE[1]))
    pred = model.predict(resized_frame)
    boxes, scores, classes, nums = output_boxes( \
        pred, cfg.MODEL_SIZE,
        max_output_size=max_output_size,
        max_output_size_per_class=max_output_size_per_class,
        iou_threshold=cfg.IOU_THRESHOLD,
        confidence_threshold=cfg.CONFIDENCE_THRESHOLD)
    image = np.squeeze(image)
    img = draw_outputs(image, boxes, scores, classes, nums, class_names)
    win_name = 'Detection'
    cv2.imshow(win_name, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

示例#8

0

显示文件

def detect_video(video_path):
    model = YOLOv3Net(cfg.CFGFILE, cfg.MODEL_SIZE, cfg.NUM_CLASSES)
    model.load_weights(cfg.WEIGHTFILE)
    class_names = load_class_names(cfg.CLASS_NAME)
    win_name = 'Detection'
    cv2.namedWindow(win_name)
    cap = cv2.VideoCapture(returnCameraOrFile(video_path))
    frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH),
                  cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    try:
        while True:
            start = time.time()
            ret, frame = cap.read()
            if not ret:
                break
            resized_frame = tf.expand_dims(frame, 0)
            resized_frame = resize_image(
                resized_frame, (cfg.MODEL_SIZE[0], cfg.MODEL_SIZE[1]))
            pred = model.predict(resized_frame)
            boxes, scores, classes, nums = output_boxes( \
                pred, cfg.MODEL_SIZE,
                max_output_size=max_output_size,
                max_output_size_per_class=max_output_size_per_class,
                iou_threshold=cfg.IOU_THRESHOLD,
                confidence_threshold=cfg.CONFIDENCE_THRESHOLD)
            img = draw_outputs(frame, boxes, scores, classes, nums,
                               class_names)
            cv2.imshow(win_name, img)
            stop = time.time()
            seconds = stop - start
            # Calculate frames per second
            fps = 1 / seconds
            print("Frames per second : {0}".format(fps))
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
    finally:
        cv2.destroyAllWindows()
        cap.release()
        print('Detections performed successfully.')

示例#9

0

显示文件

def main():
    model = yolov3_net(cfg_file, num_classes)
    model.load_weights(weights_file)

    class_names = load_class_names(class_names_file)

    image = cv2.imread(img_path)
    image = tf.expand_dims(image, 0)
    resized_frame = resize_image(image, (model_size[0], model_size[1]))
    start_time = time.time()
    pred = model.predict(resized_frame, steps=1)
    print("Time inference: ", time.time() - start_time)
    boxes, scores, classes, nums = output_boxes(pred, model_size, max_output_size, max_output_size_per_class,
                                                iou_threshold, confidence_threshold)

    image = np.squeeze(image)
    img = draw_output(image, boxes, scores, classes, nums, class_names)

    img = cv2.resize(img, (0, 0), fx=0.5, fy=0.5)
    win_name = "Image detection"
    cv2.imshow(win_name, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

示例#10

0

显示文件

def write_predict(raw_image, graph, fps):
    with tf.Session(graph=graph) as sess:
        # Encode test image
        raw_img, test_input = encode_img(raw_image, MODEL_SIZE)
        print('test_input shape', test_input.shape)
        # Run tf model
        pred = sess.run(y, feed_dict={x: test_input})
        # Handle model output
        boxes, scores, classes, nums = output_boxes( \
         pred, MODEL_SIZE,
         max_output_size=MAX_OUTPUT_SIZE,
         max_output_size_per_class=MAX_OUTPUT_SIZE_PER_CLASS,
         iou_threshold=IOU_THRESHOLD,
         confidence_threshold=CONFIDENCE_THRESHOLD)
        img = draw_outputs(raw_img, boxes, scores, classes, nums, class_names)
        # Add fps value
        words_color = (0, 0, 255)  #BGR
        if fps is not None:
            cv2.putText(img, "FPS: {:.2f}".format(fps), (20, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, words_color, 1)
        # Write final result
        cv2.imwrite('result.jpg', img)
        print('scores', scores.eval())

示例#11

0

显示文件

文件： image.py 项目： robertchu1205/deep_learning

def main():

    model = YOLOv3Net(cfgfile, model_size, num_classes)
    model.load_weights(weightfile)

    class_names = load_class_names(class_name)

    image = cv2.imread(img_filename)
    image = np.array(image)
    image = tf.expand_dims(image, 0)

    resized_frame = resize_image(image, (model_size[0], model_size[1]))
    pred = model.predict(resized_frame)

    boxes, scores, classes, nums = output_boxes( \
        pred, model_size,
        max_output_size=max_output_size,
        max_output_size_per_class=max_output_size_per_class,
        iou_threshold=iou_threshold,
        confidence_threshold=confidence_threshold)

    print('boxes', boxes)
    print('scores', scores[scores >= confidence_threshold])
    print('classes', classes[classes != 0])
    print('nums', nums)
    return 0

    image = np.squeeze(image)
    img = draw_outputs(image, boxes, scores, classes, nums, class_names)

    # win_name = 'Image detection'
    # cv2.imshow(win_name, img)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

    #If you want to save the result, uncommnent the line below:
    cv2.imwrite('data/images/output_dog.jpg', img)

示例#12

0

显示文件

文件： model.py 项目： Ami13F/DarknetConverter

    def create_network(self):
        '''
        Create Yolo network
        Input:
            model_size: (width,height,dim) -> size of model

        Transform each layer from cfg file to tensors.

        Return: 
            model -> computed tensor model 
        '''
        model_size = self.model_size
        outputs = {}
        output_filters = []
        filters = []
        out_pred = []
        scale = 0
        # create keras input for model
        inputs = input_image = Input(shape=model_size)
        num_classes = self.num_classes
        # Get all layers without net
        for i, block in enumerate(self.blocks[1:]):
            # If block is convolutional layer
            # print("Layer: {} type: {}".format(i, block['type']))
            if block["type"] == "convolutional":
                inputs, filters, strides = self.create_convolutional(
                    block, inputs, i)

            elif block["type"] == "upsample":
                stride = int(block["stride"])
                inputs = UpSampling2D(size=(stride, stride))(inputs)

            elif block["type"] == "maxpool":
                stride = int(block["stride"])
                size = int(block["size"])
                padd = 'same'
                inputs = MaxPool2D(size, strides=stride, padding=padd)(inputs)

            # If block is route layer
            elif block["type"] == "route":
                ind_backward = list(map(int, block["layers"].split(",")))
                # In case of relative indices
                for ind, el in enumerate(ind_backward):
                    if el < 0:
                        ind_backward[ind] += i

                start = ind_backward[0]
                if len(ind_backward) > 1:
                    end = ind_backward[1]
                    filters = output_filters[start] + output_filters[end]
                    inputs = tf.concat([outputs[start], outputs[end]],
                                       axis=-1,
                                       name="route_{}".format(i))
                # One index for layer
                else:
                    filters = output_filters[start]
                    inputs = outputs[start]

            # Skip layers
            elif block["type"] == "shortcut":
                step = int(block["from"])
                activation = block["activation"]
                if step < 0:
                    # relative step to the current layer
                    step += i

                last_output = outputs[i - 1]
                prev_output = outputs[step]
                out_channels = tf.reduce_min(
                    [last_output.shape[-1], outputs[step].shape[-1]])
                # create same dimensions for last channel
                if prev_output.shape[-1] < out_channels:
                    padd_val = (out_channels - prev_output.shape[-1])
                    padding = tf.constant([[0, 0], [0, 0], [0, 0],
                                           [0, padd_val]])
                    prev_output = tf.pad(prev_output, padding, "CONSTANT")
                elif prev_output.shape[-1] > out_channels:
                    prev_output = prev_output[:, :, :, :out_channels]
                elif last_output.shape[-1] > out_channels:
                    last_output = last_output[:, :, :, :out_channels]

                inputs = tf.math.add(last_output, prev_output)

            elif block["type"] == "yolo":
                inputs, initial_shape, anchors = self.create_yolo(
                    block, inputs)
                strides, prediction = self.create_prediction(
                    inputs, block, input_image, anchors, initial_shape,
                    strides)
                if scale:
                    out_pred = tf.concat([out_pred, prediction], axis=1)
                else:
                    out_pred = prediction
                    scale = 1

            outputs[i] = inputs
            output_filters.append(filters)

        boxes, classes, scores, nums = output_boxes(
            out_pred,
            model_size,
            max_output_size=10,
            max_output_size_per_class=5,
            iou_threshold=self.iou_threshold,
            confidence_threshold=self.confidence_threshold)

        model = Model(input_image, outputs=[boxes, classes, scores, nums])

        return model

示例#13

0

显示文件

def main():

    # Kreiranje modela
    model = YOLOv3Net(cfgfile, model_size, num_classes)
    # Učitavanje istreniranih koeficijenata u model
    model.load_weights(weightfile)
    # Učitavanje imena klasa
    class_names = load_class_names(class_name)
	
	# Učitavanje ulaznih fotografija i predobrada u format koji očekuje model
    images_left = []
    resized_images_left = []
    filenames_left = []
    
    # Load left camera data 
    [images_left, resized_images_left, filenames_left] = loadAndResize(img_path_left_cam)
    
    images_right = []
    resized_images_right = []
    filenames_right = []
    
    # Load right camera data 
    [images_right, resized_images_right, filenames_right] = loadAndResize(img_path_right_cam)
    
    # Object distance and bounding box index
    distanceIndexPair = []
    
    # Inferencija nad ulaznom slikom
    # izlazne predikcije pred - skup vektora (10647), gde svaki odgovara jednom okviru lokacije objekta 
    for i in range(0, len(filenames_left)):
        resized_image = []
        
        image = images_left[i]

        resized_image.append(resized_images_left[i])
        resized_image.append(resized_images_right[i])
        
        resized_image = tf.expand_dims(resized_image, 0)
        resized_image = np.squeeze(resized_image)
        
        pred = model.predict(resized_image)

        # Određivanje okvira oko detektovanih objekata (za određene pragove)
        boxes, scores, classes, nums = output_boxes( \
            pred, model_size,
            max_output_size=max_output_size,
            max_output_size_per_class=max_output_size_per_class,
            iou_threshold=iou_threshold,
            confidence_threshold=confidence_threshold)

        # calculate distance
        distanceIndexPair = objectDistance(images_left[i], images_right[i], boxes, nums, classes)

        out_img = draw_outputs(image, boxes, scores, classes, nums, class_names, cLeftCamId, distanceIndexPair)

        # Čuvanje rezultata u datoteku
        out_file_name = './out/Izlazna slika.png'
        cv2.imwrite(out_file_name, out_img)

        # Prikaz rezultata na ekran
        cv2.imshow(out_file_name, out_img)
        #cv2.waitKey(0)

        if(cv2.waitKey(20) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

示例#14

0

显示文件

def main():

    model = YOLOv3Net(cfgfile, model_size, num_classes)

    model.load_weights(weightfile)

    class_names = load_class_names(class_name)

    win_name = 'Yolov3 detection'
    cv2.namedWindow(win_name)

    # Specify the camera url.
    # For camera, just change the camera URL to match your IP camera RTSP stream or MPEG stream.
    cap = cv2.VideoCapture(
        "rtsp://*****:*****@172.168.50.208:554/cam/realmonitor?channel=1&subtype=1"
    )
    frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH),
                  cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    try:
        while True:
            start = time.time()
            cap.grab()  # Grab the most recent frame from the camera stream
            ret, frame = cap.read()  # Read it into a frame buffer
            if not ret:
                break

            resized_frame = tf.expand_dims(frame, 0)
            resized_frame = resize_image(resized_frame,
                                         (model_size[0], model_size[1]))

            pred = model.predict(resized_frame)

            boxes, scores, classes, nums = output_boxes( \
                pred, model_size,
                max_output_size=max_output_size,
                max_output_size_per_class=max_output_size_per_class,
                iou_threshold=iou_threshold,
                confidence_threshold=confidence_threshold)

            img = draw_outputs(frame, boxes, scores, classes, nums,
                               class_names)
            cv2.imshow(win_name, img)

            stop = time.time()

            seconds = stop - start
            # print("Time taken : {0} seconds".format(seconds))

            # Calculate frames per second
            fps = 1 / seconds
            print("Estimated frames per second : {0}".format(fps))

            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
            if key == 27:
                break

            # Adjust frame rate
            #if fps > 30:
            #    fps = fps * 0.5
            #    cap.set(cv2.CAP_PROP_FPS, int(fps))
            #    print("Changing frame rate to: {0}".format(int(fps)))
            #else:
            #    cap.set(cv2.CAP_PROP_FPS, 10)
            #    print("Changing frame rate to: {0}".format(int(fps)))

    finally:
        cv2.destroyAllWindows()
        cap.release()
        print('Detections have been performed successfully.')