예제 #1
0
파일: demo.py 프로젝트: Team1541/SWProject
def main(input_path, DEBUG):
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
    )
    classes = load_coco_names(FLAGS.class_names)
    frozenGraph = load_graph(FLAGS.frozen_model)
    boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)
    boxes_list = []
    with tf.Session(graph=frozenGraph, config=config) as sess:
        for item in input_path:
            start = clock()
            FLAGS.input_img = item
            img = Image.open(FLAGS.input_img)
            img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128)
            img_resized = img_resized.astype(np.float32)
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})
            filtered_boxes = non_max_suppression(
                detected_boxes,
                confidence_threshold=FLAGS.conf_threshold,
                iou_threshold=FLAGS.iou_threshold)
            boxes_list.append(filtered_boxes)
            if DEBUG:
                draw_boxes(filtered_boxes, img, classes,
                           (FLAGS.size, FLAGS.size), True)
            print(filtered_boxes)
            print("Execution Time : {} / #Symbols : {}  / Path : {}".format(
                clock() - start, len(filtered_boxes), item))
        sess.close()
    tf.reset_default_graph()
    return boxes_list, classes, FLAGS.size
예제 #2
0
def detection(path):
    image = Image.open(path)
    img_resized = utils.letter_box_image(image, input_size, input_size, 128)
    img_resized = img_resized.astype(np.float32)
    boxes, inputs = utils.get_boxes_and_inputs_pb(frozenGraph)
    t0 = time.time()
    detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})
    filtered_boxes = utils.non_max_suppression(detected_boxes,
                                               confidence_threshold=conf_threshold,
                                               iou_threshold=iou_threshold)
    print("Predictions found in {:.2f}s".format(time.time() - t0))
    if filtered_boxes:
        # if len(filtered_boxes[0][:]) == 1:
        img, region, score, box = utils.draw_boxes(filtered_boxes, image, classes, (input_size, input_size), True)
        # box = np.array(box)
        # print(box)
        if score > 0.90:
            person_image_height = box[0][3] - box[0][1]
            # region.save(out_image)
            print(person_image_height)
            # 计算当前用户身高
            # 可根据参照物(本例采用椅子作为参照物,其实际高度为96cm,在固定距离下该参照物在图像中像素值为230)实际高度与图像高度像素,
            # 获取人物图像像素高度。具体调参需在具体环境下进行调参
            # 此方法存在较大的误差,故结果仅供趣味输出,追求准确仍需具体输入准确值
            person_height = (person_image_height * 96) / 230
            print("person_height: %.2fcm \n" % (person_height))
def main(argv=None):
    # GPU配置
    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
    # config = tf.ConfigProto(gpu_options=gpu_options,log_device_placement=False,)
    # 类别、视频或图像输入
    classes = load_coco_names(args.class_names)

    vid = cv2.VideoCapture(args.input_video)
    video_frame_cnt = int(vid.get(7))  # AVI:10148  RSTP: 中无总帧数属性 视频文件中的帧数
    timeF = 10  # 分帧率 130ms配合2
    fpsnum = int(vid.get(1))  # 基于以0开始的被捕获或解码的帧索引
    if (fpsnum % timeF == 0):
        for i in range(video_frame_cnt):
            ret, img_ori = vid.read()
            # 图像填充
            img_ori = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB)
            img_ori = Image.fromarray(img_ori) # CV2图片转PIL
            img_resized = letter_box_image(img_ori,img_ori.size[1], img_ori.size[0], args.size, args.size, 128)
            img_resized = img_resized.astype(np.float32)
            # 图像插值
            # img = cv2.resize(img_ori, (args.size, args.size))
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # cv2默认为bgr顺序
            # img_resized = np.asarray(img, np.float32)
            # 编码方式1
            # scipy.misc.imsave(args.temp_img, img_resized)
            # _, jpeg_bytes = base64_encode_img(args.temp_img)
            # 编码方式2
            img_encode = cv2.imencode('.jpg', img_resized)[1]
            data_encode = np.array(img_encode)
            jpeg_bytes = data_encode.tostring()
            start_time = time.time()
            # 服务器通讯配置
            channel = grpc.insecure_channel(args.server)
            stub = prediction_service_pb2.PredictionServiceStub(channel)
            request = predict_pb2.PredictRequest()
            request.model_spec.name = 'yolov3_2'
            request.model_spec.signature_name = 'predict_images'
            # 等待服务器答复
            request.inputs['images'].CopyFrom(tf.contrib.util.make_tensor_proto(jpeg_bytes, shape=[1]))
            response = stub.Predict(request, 10.0)
            # 对返回值进行操作
            results = {}
            for key in response.outputs:
                tensor_proto = response.outputs[key]
                nd_array = tf.contrib.util.make_ndarray(tensor_proto)
                results[key] = nd_array
            detected_boxes = results['scores']
            # nms计算
            filtered_boxes = non_max_suppression(detected_boxes,confidence_threshold=args.conf_threshold,iou_threshold=args.iou_threshold)
            end_time = time.time()
            difference_time = end_time - start_time  # 网络运行时间
            # 画图
            draw_boxes(filtered_boxes, img_ori, classes, (args.size, args.size), True)
            # 输出图像
            cv2charimg = cv2.cvtColor(np.array(img_ori), cv2.COLOR_RGB2BGR) # PIL图片转cv2 图片
            cv2.putText(cv2charimg, '{:.2f}ms'.format((difference_time) * 1000), (40, 40), 0,
                        fontScale=1, color=(0, 255, 0), thickness=2)
            cv2.imshow('image', cv2charimg)
            if cv2.waitKey(1) & 0xFF == ord('q'): # 视频退出
                break
예제 #4
0
def show_camera(sess, boxes, inputs):
    # To flip the image, modify the flip_method parameter (0 and 2 are the most common)
    classes = load_coco_names(FLAGS.class_names)
    print(gstreamer_pipeline(flip_method=0))
    cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=0),
                           cv2.CAP_GSTREAMER)
    if cap.isOpened():
        window_handle = cv2.namedWindow('CSI Camera', cv2.WINDOW_AUTOSIZE)
        while cv2.getWindowProperty('CSI Camera', 0) >= 0:
            ret_val, img = cap.read()
            cv2_im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            pil_im = Image.fromarray(cv2_im)
            img_resized = letter_box_image(pil_im, FLAGS.size, FLAGS.size, 128)
            img_resized = img_resized.astype(np.float32)
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})
            filtered_boxes = non_max_suppression(
                detected_boxes,
                confidence_threshold=FLAGS.conf_threshold,
                iou_threshold=FLAGS.iou_threshold)
            draw_boxes(filtered_boxes, pil_im, classes,
                       (FLAGS.size, FLAGS.size), True)
            img = np.array(pil_im)
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            cv2.imshow('CSI Camera', img)
            keyCode = cv2.waitKey(30) & 0xff
            if keyCode == 27:
                break
        cap.release()
        cv2.destroyAllWindows()
    else:
        print('Unable to open camera')
예제 #5
0
def main(argv=None):

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)

    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
    )

    img = Image.open(FLAGS.input_img)
    img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128)
    img_resized = img_resized.astype(np.float32)
    classes = load_coco_names(FLAGS.class_names)

    if FLAGS.frozen_model:

        t0 = time.time()
        frozenGraph = load_graph(FLAGS.frozen_model)
        print("Loaded graph in {:.2f}s".format(time.time() - t0))

        #print(frozenGraph.inputs)
        #print(frozenGraph.outputs)

        boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)

        with tf.Session(graph=frozenGraph, config=config) as sess:
            t0 = time.time()
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})

    else:
        if FLAGS.tiny:
            model = yolo_v3_tiny.yolo_v3_tiny
        elif FLAGS.spp:
            model = yolo_v3.yolo_v3_spp
        else:
            model = yolo_v3.yolo_v3

        boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size,
                                             FLAGS.data_format)

        saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))

        with tf.Session(config=config) as sess:
            t0 = time.time()
            saver.restore(sess, FLAGS.ckpt_file)
            print('Model restored in {:.2f}s'.format(time.time() - t0))

            t0 = time.time()
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})

    filtered_boxes = non_max_suppression(
        detected_boxes,
        confidence_threshold=FLAGS.conf_threshold,
        iou_threshold=FLAGS.iou_threshold)
    print("Predictions found in {:.2f}s".format(time.time() - t0))

    draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True)

    img.save(FLAGS.output_img)
def main(argv=None):

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)

    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
    )

    classes = load_coco_names(FLAGS.class_names)

    t0 = time.time()
    frozenGraph = load_graph(FLAGS.frozen_model)
    print("Loaded graph in {:.2f}s".format(time.time() - t0))

    boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)

    with tf.Session(graph=frozenGraph, config=config) as sess:
        t0 = time.time()
        print(FLAGS.input_img)
        cap = cv2.VideoCapture(FLAGS.input_img)
        # cap = cv2.VideoCapture(0)
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
        videoWriter = cv2.VideoWriter(
            "output.mp4", cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps,
            (int(width), int(height)))
        while (cap.isOpened()):
            ret, frame = cap.read()
            if ret == True:
                frame = cv2.flip(frame, 0)
                img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                img_resized = letter_box_image(img, FLAGS.size, FLAGS.size,
                                               128)
                img_resized = img_resized.astype(np.float32)
                detected_boxes = sess.run(boxes,
                                          feed_dict={inputs: [img_resized]})
                filtered_boxes = non_max_suppression(
                    detected_boxes,
                    confidence_threshold=FLAGS.conf_threshold,
                    iou_threshold=FLAGS.iou_threshold)
                print("Predictions found in {:.2f}s".format(time.time() - t0))

                draw_boxes(filtered_boxes, img, classes,
                           (FLAGS.size, FLAGS.size), True)

                fimg = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
                cv2.imshow("show", fimg)
                videoWriter.write(fimg)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            else:
                break
        cap.release()
        videoWriter.release()
예제 #7
0
    def get_classification(self, cv_image):
        """Determines the color of the traffic light in the image

        Args:
            image (cv::Mat): image containing the traffic light

        Returns:
            int: ID of traffic light color (specified in styx_msgs/TrafficLight)

        """
        #TODO implement light color prediction

        image = Image.fromarray(cv_image)
        img_resized = letter_box_image(image, options['image_size'],
                                       options['image_size'], 128)
        img_resized = img_resized.astype(np.float32)

        boxes, inputs = get_boxes_and_inputs_pb(self.frozenGraph)

        # with tf.Session(graph=self.frozenGraph, config=self.config) as sess:
        t0 = time.time()
        detected_boxes = self.sess.run(boxes,
                                       feed_dict={inputs: [img_resized]})
        filtered_boxes = non_max_suppression(
            detected_boxes,
            confidence_threshold=options['thresh'],
            iou_threshold=options['iou'])
        print("Predictions found in {:.2f}s".format(time.time() - t0))
        inp = filtered_boxes.get(9)
        inp_new = dict()
        inp_new[9] = inp

        if (inp_new[9] != None):
            if (len(inp_new[9]) > 0):
                for cls, bboxs in inp_new.items():
                    for box, score in bboxs:
                        box = convert_to_original_size(
                            box,
                            (options['image_size'], options['image_size']),
                            np.array(image.size), True)
                # print(inp_new)
                a = analyze_color(inp_new, cv_image)
                # print(a)
                light_color = state_predict(a)
                print("the light color is {}".format(light_color))
                if light_color:
                    if light_color == 'YELLOW':
                        return TrafficLight.YELLOW
                    elif light_color == 'RED':
                        return TrafficLight.RED
                    elif light_color == 'GREEN':
                        return TrafficLight.GREEN

        return TrafficLight.UNKNOWN
예제 #8
0
def main(argv=None):

    img = Image.open('out/images/19.png')
    # img = Image.open('city.png')
    img_resized = letter_box_image(img, size, size, 128)
    img_resized = img_resized.astype(np.float32)
    classes = load_coco_names('coco.names')

    if frozen_model:

        t0 = time.time()
        frozenGraph = load_graph(frozen_model)
        print("Loaded graph in {:.2f}s".format(time.time() - t0))

        boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)

        with tf.Session(graph=frozenGraph) as sess:
            t0 = time.time()
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})

    else:
        if tiny:
            model = yolo_v3_tiny.yolo_v3_tiny
        else:
            model = yolo_v3.yolo_v3

        boxes, inputs = get_boxes_and_inputs(model, len(classes), size,
                                             data_format)

        saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))

        with tf.Session() as sess:
            t0 = time.time()
            saver.restore(sess, ckpt_file)
            print('Model restored in {:.2f}s'.format(time.time() - t0))

            t0 = time.time()
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})

    filtered_boxes = non_max_suppression(detected_boxes,
                                         confidence_threshold=conf_threshold,
                                         iou_threshold=iou_threshold)
    print("Predictions found in {:.2f}s".format(time.time() - t0))

    draw_boxes(filtered_boxes, img, classes, (size, size), True)
    img.save('out_check.png')
예제 #9
0
    def detect(self, img):
        img_resized = letter_box_image(img, self.size[0], self.size[1], 128)
        img_resized = img_resized.astype(np.float32)

        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        #
        # import pdb
        # pdb.set_trace()

        detected_boxes = self.sess.run(self.boxes,
                                       feed_dict={self.inputs: [img_resized]},
                                       options=run_options,
                                       run_metadata=run_metadata)
        filtered_boxes = non_max_suppression(
            detected_boxes,
            confidence_threshold=self.conf_threshold,
            iou_threshold=self.iou_threshold)
        return filtered_boxes
예제 #10
0
	def infer(self, input_image):

	    # img = Image.open('test_images/car2.png')
	    img = input_image.copy()
	    img_resized = letter_box_image(img, 416, 416, 128)
	    img_resized = img_resized.astype(np.float32)


	    t0 = time.time()
	    detected_boxes = self.sess.run(self.boxes, feed_dict={self.inputs: [img_resized]})

	    filtered_boxes = non_max_suppression(detected_boxes,
	                                         confidence_threshold=0.8,
	                                         iou_threshold=0.5)
	    # print(filtered_boxes)
	    print("Predictions found in {:.3f}s".format(time.time() - t0))

	    draw_boxes(filtered_boxes, img, self.classes, (416, 416), True)
	    # img.save('out.png')
	    return img,filtered_boxes
예제 #11
0
def main(argv=None):
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)

    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
        # inter_op_parallelism_threads=0,
        # intra_op_parallelism_threads=0,
        # device_count={"CPU": 6}
    )

    img = Image.open(FLAGS.input_img)
    if FLAGS.keep_aspect_ratio:
        img_resized = utils.letter_box_image(img, FLAGS.size, FLAGS.size, 128)
        img_resized = img_resized.astype(np.float32)
    else:
        img_resized = img.resize((FLAGS.size, FLAGS.size), Image.BILINEAR)
        img_resized = np.asarray(img_resized, dtype=np.float32)

    classes = utils.load_names(FLAGS.class_names)
    frozenGraph = utils.load_graph(FLAGS.frozen_model)

    boxes, inputs = utils.get_boxes_and_inputs_pb(frozenGraph)

    with tf.Session(graph=frozenGraph, config=config) as sess:
        t0 = time.time()
        detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})

    print("Predictions found in {:.2f}s".format(time.time() - t0))

    filtered_boxes = utils.non_max_suppression(
        detected_boxes,
        confidence_threshold=FLAGS.conf_threshold,
        iou_threshold=FLAGS.iou_threshold)[0]

    utils.draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size),
                     FLAGS.keep_aspect_ratio)

    img.save(FLAGS.output_img)
예제 #12
0
def get_score_from_image(img_fp, gpu_options, config, model):
    img = Image.open(img_fp)
    img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128)
    img_resized = img_resized.astype(np.float32)
    classes = load_coco_names(FLAGS.class_names)

    inference_start_time = time.time()
    if FLAGS.frozen_model:
        boxes, inputs = get_boxes_and_inputs_pb(model)

        with tf.Session(graph=model, config=config) as sess:
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})

    else:
        if FLAGS.tiny:
            model = yolo_v3_tiny.yolo_v3_tiny
        elif FLAGS.spp:
            model = yolo_v3.yolo_v3_spp
        else:
            model = yolo_v3.yolo_v3

        boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size,
                                             FLAGS.data_format)

        saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))

        with tf.Session(config=config) as sess:
            saver.restore(sess, FLAGS.ckpt_file)
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})

    total_inference_time = time.time() - inference_start_time

    filtered_boxes = non_max_suppression(
        detected_boxes,
        confidence_threshold=FLAGS.conf_threshold,
        iou_threshold=FLAGS.iou_threshold)

    return get_person_scores(filtered_boxes,
                             classes), round(total_inference_time * 1000, 3)
예제 #13
0
    def detect(self, frame):
        t0 = time()

        frame = letter_box_image(
            frame, (self.params["input_w"], self.params["input_h"]), 128)
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        #img = np.expand_dims(img, axis=0).astype(np.float32)
        img = img[np.newaxis, ...].astype(np.float32)
        img = np.uint8(img / self.scale + self.zero_point)

        self.interpreter.set_tensor(self.input_details[0]['index'], img)
        self.interpreter.invoke()
        # The function `get_tensor()` returns a copy of the tensor data.
        # Use `tensor()` in order to get a pointer to the tensor.
        y1 = self.interpreter.get_tensor(self.output_details[0]['index'])
        y2 = self.interpreter.get_tensor(self.output_details[1]['index'])
        y1 = self.scale1 * (np.float32(y1) - self.zero_point1)
        y2 = self.scale2 * (np.float32(y2) - self.zero_point2)

        detected_boxes = [y1, y2]
        filtered_boxes = non_max_suppression(
            detected_boxes,
            self.params,
            confidence_threshold=self.prob_threshold,
            iou_threshold=self.iou_threshold)
        draw_boxes(filtered_boxes, frame, self.classes,
                   (self.params["input_w"], self.params["input_h"]),
                   self.colors, True)

        inf_time = time() - t0
        fps = 1. / inf_time

        cv2.putText(frame, "FPS: {:.1f}".format(fps), (10, 20),
                    cv2.FONT_HERSHEY_DUPLEX, 0.40, (0, 0, 255), 1, cv2.LINE_AA)

        return frame
def main(argv=None):

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)

    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
    )

    # img = Image.open(FLAGS.input_img)
    # img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128)
    # img_resized = img_resized.astype(np.float32)
    classes = load_coco_names(FLAGS.class_names)

    # if FLAGS.frozen_model:

    t0 = time.time()
    frozenGraph = load_graph(FLAGS.frozen_model)
    print("Loaded graph in {:.2f}s".format(time.time() - t0))

    boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)

    ### Start inference on Video
    cap = cv2.VideoCapture(FLAGS.input_video)
    cap.open(FLAGS.input_video)
    # Grab the shape of the input
    width = int(cap.get(3))
    height = int(cap.get(4))

    with tf.Session(graph=frozenGraph, config=config) as sess:
        while cap.isOpened():
            flag, img = cap.read()
            if not flag:
                break
            key_pressed = cv2.waitKey(27)

            img = cv2.cvtColor(
                img, cv2.COLOR_BGR2RGB)  #Image.open(FLAGS.input_video)
            # convert from cv2 image to PIL image
            img = Image.fromarray(img)
            img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128)
            img_resized = img_resized.astype(np.float32)
            classes = load_coco_names(FLAGS.class_names)

            t0 = time.time()
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})
            infer_time = time.time() - t0

            filtered_boxes = non_max_suppression(
                detected_boxes,
                confidence_threshold=FLAGS.conf_threshold,
                iou_threshold=FLAGS.iou_threshold)

            draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size),
                       True)

            img = np.asarray(img)
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

            cv2.putText(
                img, "infer time= " + str('{:.1f}'.format(infer_time * 1000)) +
                " ms", (80, 40), 0, 0.5, (250, 0, 0), 1)

            ### Send the frame to the FFMPEG server ###
            sys.stdout.buffer.write(img)
            sys.stdout.flush()

            # Break if escape key pressed
            if key_pressed == 27:
                break

    # Release the out capture, and destroy any OpenCV windows
    cap.release()
    cv2.destroyAllWindows()
예제 #15
0
def main(argv=None):
    # GPU配置
    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
    # config = tf.ConfigProto(gpu_options=gpu_options,log_device_placement=False,)
    # 类别、视频或图像输入
    classes = load_coco_names(args.class_names)

    # 图像填充
    # img = Image.open(args.input_img)
    # img_resized = letter_box_image(img, img.size[1], img.size[0], args.size, args.size, 128)
    # img_resized = img_resized.astype(np.float32)
    # 图像插值
    # img_ori = cv2.imread(args.input_img)
    # height_ori, width_ori = img_ori.shape[:2]
    # img = cv2.resize(img_ori, (args.size, args.size))
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # cv2默认为bgr顺序
    # img_resized = np.asarray(img, np.float32)

    img_ori = cv2.imread(args.input_img)
    img_ori = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(img_ori)  # CV2图片转PIL
    img_resized = letter_box_image(img, img.size[1], img.size[0], args.size,
                                   args.size, 128)
    img_resized = img_resized.astype(np.float32)
    scipy.misc.imsave(args.temp_img, img_resized)
    _, jpeg_bytes = base64_encode_img(args.temp_img)

    # 服务器通讯配置
    channel = grpc.insecure_channel(args.server)
    stub = prediction_service_pb2.PredictionServiceStub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = 'yolov3_2'
    request.model_spec.signature_name = 'predict_images'
    # 等待服务器答复
    t0 = time.time()
    request.inputs['images'].CopyFrom(
        tf.contrib.util.make_tensor_proto(jpeg_bytes, shape=[1]))
    response = stub.Predict(request, 10.0)
    # 对返回值进行操作
    results = {}
    for key in response.outputs:
        tensor_proto = response.outputs[key]
        nd_array = tf.contrib.util.make_ndarray(tensor_proto)
        results[key] = nd_array
    detected_boxes = results['scores']
    # nms计算
    filtered_boxes = non_max_suppression(
        detected_boxes,
        confidence_threshold=args.conf_threshold,
        iou_threshold=args.iou_threshold)
    # 画图
    draw_boxes(filtered_boxes, img, classes, (args.size, args.size), True)
    # 输出图像
    plt.figure('判断结果')
    font = {
        'family': 'simhei',
        'weight': 'normal',
        'size': 18,
    }
    plt.title('判断耗时:{:.2f}ms'.format((time.time() - t0) * 1000), font)
    plt.imshow(img)
    plt.show()
    print('done!')
예제 #16
0
def main(argv=None):

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=cfg.GPU_MEMORY_FRACTION)

    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
    )

    classes = load_coco_names(cfg.CLASS_NAME)

    if cfg.FROZEN_MODEL:
        pass
    #
    #     t0 = time.time()
    #     frozenGraph = load_graph(cfg.FROZEN_MODEL)
    #     print("Loaded graph in {:.2f}s".format(time.time()-t0))
    #
    #     boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)
    #
    #     with tf.Session(graph=frozenGraph, config=config) as sess:
    #         t0 = time.time()
    #         detected_boxes = sess.run(
    #             boxes, feed_dict={inputs: [img_resized]})

    else:
        if cfg.TINY:
            model = yolo_v3_tiny.yolo_v3_tiny
        else:
            model = yolo_v3.yolo_v3

        boxes, inputs = get_boxes_and_inputs(model, len(classes),
                                             cfg.IMAGE_SIZE, cfg.DATA_FORMAT)
        # boxes : coordinates of top left and bottom right points.
        saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))

        #
        # for specific object recognition
        #
        vgg16_image_size = vgg_16.default_image_size

        s_class_names = cfg.S_CLASS_PATH
        s_classes = [l.split(" ") for l in open(s_class_names, "r")]
        if len(s_classes[0]):  # classフォーマットが "id classname"の場合
            s_labels = {int(l[0]): l[1].replace("\n", "") for l in s_classes}
        else:  # classフォーマットが "classname"のみの場合
            s_labels = {
                i: l.replace("\n", "")
                for i, l in enumerate(s_classes)
            }

        num_classes_s = len(s_labels.keys())

        num_classes_extractor = cfg.S_EXTRACTOR_NUM_OF_CLASSES
        s_model = cfg.S_CKPT_FILE

        extractor_name = cfg.S_EXTRACTOR_NAME

        # specific_pred, [cropped_images_placeholder, original_images_placeholder, keep_prob, is_training] = specific_object_recognition(vgg16_image_size, num_classes_s, num_classes_extractor, extractor_name)
        specific_pred, [cropped_images_placeholder, keep_prob,
                        is_training] = specific_object_recognition(
                            vgg16_image_size, num_classes_s)

        variables_to_restore = slim.get_variables_to_restore(
            include=["vgg_16"])
        restorer = tf.train.Saver(variables_to_restore)
        with tf.Session(config=config) as sess:
            t0 = time.time()
            saver.restore(sess, cfg.CKPT_FILE)
            print('YOLO v3 Model restored in {:.2f}s'.format(time.time() - t0),
                  "from:", cfg.CKPT_FILE)

            t0 = time.time()
            restorer.restore(sess, s_model)
            print(
                'Specific object recognition Model restored in {:.2f}s'.format(
                    time.time() - t0), "from:", s_model)

            # prepare test set
            with open(cfg.TEST_FILE_PATH, 'r') as f:
                f_ = [line.rstrip().split() for line in f]

            data = [
                [l, get_annotation(l[0], txtname=cfg.GT_INFO_FILE_NAME)]
                for l in f_
            ]  # data: [[(path_str, label), [frame, center_x, center_y, size_x, size_y]],...]
            data = [l for l in data
                    if l[1] is not None]  # annotationを取得できなかった画像は飛ばす

            def is_cropped_file_Exist(orig_filepath):
                d, file = os.path.split(orig_filepath)
                cropped_d = d + "_cropped"
                cropped_file = os.path.join(cropped_d, file)
                return os.path.exists(cropped_file)

            data = [l for l in data
                    if is_cropped_file_Exist(l[0][0])]  # 対となるcrop画像がない画像は飛ばす

            # log
            f = open(cfg.OUTPUT_LOG_PATH, 'w')
            writer = csv.writer(f, lineterminator='\n')
            writer.writerow([
                'image path', 'movie_name', 'IoU', 'Average Precision',
                'Recall', 'is RoI detected?', 'is label correct?', 'gt label',
                'pred label', 'detect time', 'recog time'
            ])

            iou_list = []  # 画像毎のiouのリスト
            ap_list = []  # 画像毎のaverage precisionのリスト

            # iterative run
            for count, gt in enumerate(
                    data
            ):  # gt: [(path_str, label), [frame, center_x, center_y, size_x, size_y]
                # for evaluation
                gt_box = [float(i) for i in gt[1][1:]]
                gt_box = [
                    gt_box[0] - (gt_box[2] / 2), gt_box[1] - (gt_box[3] / 2),
                    gt_box[0] + (gt_box[2] / 2), gt_box[1] + (gt_box[3] / 2)
                ]
                gt_label = int(gt[0][1])
                ious = []
                precisions = []

                print(count, ":", gt[0][0])
                img = Image.open(gt[0][0])
                img_resized = letter_box_image(img, cfg.IMAGE_SIZE,
                                               cfg.IMAGE_SIZE, 128)
                img_resized = img_resized.astype(np.float32)

                t0 = time.time()
                detected_boxes = sess.run(boxes,
                                          feed_dict={inputs: [img_resized]})

                filtered_boxes = non_max_suppression(
                    detected_boxes,
                    confidence_threshold=cfg.CONF_THRESHOLD,
                    iou_threshold=cfg.IOU_THRESHOLD)
                detect_time = time.time() - t0

                print("detected boxes in :{:.2f}s ".format(detect_time),
                      filtered_boxes)

                # specific object recognition!
                np_img = np.array(img) / 255
                target_label = 0  # seesaaの場合 (データセットのクラス番号毎にここを変える.)

                if len(filtered_boxes.keys()) != 0:  # 何かしら検出された時
                    is_detected = True

                    for cls, bboxs in filtered_boxes.items():
                        if cls == target_label:  # ターゲットラベルなら
                            print("target class detected!")
                            bounding_boxes = []
                            bboxs_ = copy.deepcopy(
                                bboxs
                            )  # convert_to_original_size()がbboxを破壊してしまうため
                            for box, score in bboxs:
                                orig_size_box = convert_to_original_size(
                                    box,
                                    np.array((cfg.IMAGE_SIZE, cfg.IMAGE_SIZE)),
                                    np.array(img.size), True)
                                # print(orig_size_box)
                                cropped_image = np_img[
                                    int(orig_size_box[1]):int(orig_size_box[3]
                                                              ),
                                    int(orig_size_box[0]):int(orig_size_box[2]
                                                              )]
                                bounding_boxes.append(cropped_image)

                            # input_original = cv2.resize(padding(np_img), (vgg16_image_size, vgg16_image_size))
                            # input_original = np.tile(input_original, (len(bounding_boxes), 1, 1, 1)) # croppedと同じ枚数分画像を重ねる

                            cropped_images = []
                            for bbox in bounding_boxes:
                                cropped_images.append(
                                    cv2.resize(
                                        padding(bbox),
                                        (vgg16_image_size, vgg16_image_size)))

                            input_cropped = np.asarray(cropped_images)

                            t0 = time.time()
                            pred = sess.run(specific_pred,
                                            feed_dict={
                                                cropped_images_placeholder:
                                                input_cropped,
                                                keep_prob: 1.0,
                                                is_training: False
                                            })

                            recog_time = time.time() - t0
                            print("Predictions found in {:.2f}s".format(
                                recog_time))

                            pred_label = [s_labels[i] for i in pred.tolist()
                                          ]  # idからクラス名を得る

                            classes = [
                                s_labels[i] for i in range(num_classes_s)
                            ]

                            filtered_boxes = {}
                            for i, n in enumerate(pred.tolist()):
                                if n in filtered_boxes.keys():
                                    filtered_boxes[n].extend([bboxs_[i]])
                                else:
                                    filtered_boxes[n] = [bboxs_[i]]

                            # calc IoU, mAP
                            # gt: [(path_str, label), [frame, center_x, center_y, size_x, size_y]
                            # print(filtered_boxes)
                            iou = 0.0
                            for key in filtered_boxes.keys():
                                for pred_box in filtered_boxes[key]:
                                    p_box = copy.deepcopy(pred_box[0])
                                    orig_scale_p_box = convert_to_original_size(
                                        p_box,
                                        np.array(
                                            (cfg.IMAGE_SIZE, cfg.IMAGE_SIZE)),
                                        np.array(img.size), True)
                                    conf = pred_box[1]
                                    # print(gt_label, key)
                                    if key == gt_label:  # 予測したクラスがGTと同じの時
                                        # print(orig_scale_p_box, gt_box)
                                        iou = _iou(
                                            orig_scale_p_box, gt_box
                                        )  # :param box1: array of 4 values (top left and bottom right coords): [x0, y0, x1, x2]
                                        precision = calc_precision(
                                            orig_scale_p_box, gt_box)
                                        is_label_correct = True
                                    else:
                                        iou = 0.0
                                        precision = 0.0
                                        is_label_correct = False

                                    # print("IoU:", iou)
                                    ious.append(iou)
                                    print("Precision:", precision)
                                    precisions.append(precision)

                        else:  # ターゲットラベルじゃない時
                            pass

                else:  #何も検出されなかった時
                    is_detected = False
                    is_label_correct = "None"
                    pred_label = ["None"]

                average_iou = sum(ious) / (len(ious) + 1e-05)  # 画像一枚のiou
                print("average IoU:", average_iou)
                iou_list.append(average_iou)
                print("mean average IoU:",
                      sum(iou_list) / (len(iou_list) + 1e-05))

                ap = sum(precisions) / (len(precisions) + 1e-05)
                ap_list.append(ap)
                print("Average Precision:", ap)
                print("mean Average Precision:",
                      sum(ap_list) / (len(ap_list) + 1e-05))

                draw_boxes(filtered_boxes, img, classes,
                           (cfg.IMAGE_SIZE, cfg.IMAGE_SIZE), True)

                # draw GT
                draw = ImageDraw.Draw(img)
                color = (0, 0, 0)
                draw.rectangle(gt_box, outline=color)
                draw.text(gt_box[:2], 'GT_' + s_labels[gt_label], fill=color)

                img.save(
                    os.path.join(
                        cfg.OUTPUT_IMAGE_DIR,
                        '{0:04d}_'.format(count) + os.path.basename(gt[0][0])))
                writer.writerow([
                    gt[0][0],
                    os.path.basename(os.path.dirname(gt[0][0])), average_iou,
                    ap, 'Recall', is_detected, is_label_correct,
                    s_labels[gt_label], pred_label[0], detect_time, recog_time
                ])

            f.close()
            print("proc finished.")
예제 #17
0
def main(argv=None):

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=cfg.GPU_MEMORY_FRACTION)

    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
    )

    classes = load_coco_names(cfg.CLASS_NAME)

    if cfg.FROZEN_MODEL:
        pass

    else:
        if cfg.TINY:
            model = yolo_v3_tiny.yolo_v3_tiny
        else:
            model = yolo_v3.yolo_v3

        boxes, inputs = get_boxes_and_inputs(model, len(classes),
                                             cfg.IMAGE_SIZE, cfg.DATA_FORMAT)
        # boxes : coordinates of top left and bottom right points.
        saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))

        with tf.Session(config=config) as sess:
            t0 = time.time()
            saver.restore(sess, cfg.CKPT_FILE)
            print('YOLO v3 Model restored in {:.2f}s'.format(time.time() - t0),
                  "from:", cfg.CKPT_FILE)

            # prepare test set
            with open(cfg.TEST_FILE_PATH, 'r') as f:
                f_ = [line.rstrip().split() for line in f]

            data = [
                [l, get_annotation(l[0], txtname=cfg.GT_INFO_FILE_NAME)]
                for l in f_
            ]  # data: [[(path_str, label), [frame, center_x, center_y, size_x, size_y]],...]
            data = [l for l in data
                    if l[1] is not None]  # annotationを取得できなかった画像は飛ばす

            def is_cropped_file_Exist(orig_filepath):
                d, file = os.path.split(orig_filepath)
                cropped_d = d + "_cropped"
                cropped_file = os.path.join(cropped_d, file)
                return os.path.exists(cropped_file)

            data = [l for l in data
                    if is_cropped_file_Exist(l[0][0])]  # 対となるcrop画像がない画像は飛ばす

            # log
            f = open(cfg.OUTPUT_LOG_PATH, 'w')
            writer = csv.writer(f, lineterminator='\n')
            writer.writerow([
                'image path', 'class/movie_name', 'IoU', 'TP', 'FP', 'FN',
                'Average Precision', 'gt label', ' highest_conf_label',
                'detect time'
            ])

            total_iou = []  # 画像毎のiouのリスト
            total_tp = 0  # TP : IoU > 0.5かつ GT==Pred_classのPositiveの数
            total_fp = 0  # FP : TPの条件以外のPositivesの数
            total_fn = 0  # FN : 検出されなかったGT
            total_ap = [
            ]  # 画像毎のaverage precisionのリスト.AP : TP / total positives

            # iterative run
            for count, gt in enumerate(
                    data
            ):  # gt: [(path_str, label), [frame, center_x, center_y, size_x, size_y]
                iou = 0.0
                tp = 0
                fp = 0
                fn = 0
                ap = 0.0

                # for evaluation
                gt_box = [float(i) for i in gt[1][1:]]
                gt_box = [
                    gt_box[0] - (gt_box[2] / 2), gt_box[1] - (gt_box[3] / 2),
                    gt_box[0] + (gt_box[2] / 2), gt_box[1] + (gt_box[3] / 2)
                ]
                gt_label = int(gt[0][1])  # GT のクラス
                gt_anno = {gt_label: gt_box}

                print(count, ":", gt[0][0])
                img = Image.open(gt[0][0])
                img_resized = letter_box_image(img, cfg.IMAGE_SIZE,
                                               cfg.IMAGE_SIZE, 128)
                img_resized = img_resized.astype(np.float32)

                t0 = time.time()
                detected_boxes = sess.run(boxes,
                                          feed_dict={inputs: [img_resized]})

                filtered_boxes = non_max_suppression(
                    detected_boxes,
                    confidence_threshold=cfg.CONF_THRESHOLD,
                    iou_threshold=cfg.IOU_THRESHOLD)
                detect_time = time.time() - t0

                print("detected boxes in :{:.2f}s ".format(detect_time),
                      filtered_boxes)

                print(filtered_boxes)
                if len(filtered_boxes.keys()) != 0:  # 何かしら検出された時
                    [tp, fp,
                     fn], iou, precision, highest_conf_label = evaluate(
                         filtered_boxes, gt_anno, img,
                         thresh=0.5)  # 一枚の画像の評価を行う
                else:  # 何も検出されなかった時
                    iou = 0.0
                    precision = 0.0
                    tp = 0
                    fp = 0
                    fn = len(gt_anno.values())
                    highest_conf_label = -1

            #
            #     print(filtered_boxes)
            #     if len(filtered_boxes.keys()) != 0:  # 何かしら検出された時
            #         is_detected = True
            #         [tp, fp, fn], iou, ap, highest_conf_label = evaluate(filtered_boxes, gt_anno, img, thresh=0.1) #一枚の画像の評価を行う
            #
            #     else:  # 何も検出されなかった時
            #         is_detected = False
            #         iou = 0.0
            #         ap = 0.0
            #         tp = 0
            #         fp = 0
            #         fn = len(gt_anno.values())
            #         highest_conf_label = -1
            #
                total_iou.append(iou)
                total_ap.append(precision)
                total_tp += tp
                total_fp += fp
                total_fn += fn

                print("IoU:", iou)
                print("mean average IoU:",
                      sum(total_iou) / (len(total_iou) + 1e-05))
                print("AP:", precision)
                print("mAP:", sum(total_ap) / (len(total_ap) + 1e-05))
                #
                #
                #     # draw pred_bbox
                #     draw_boxes(filtered_boxes, img, classes, (cfg.IMAGE_SIZE, cfg.IMAGE_SIZE), True)
                #     # draw GT
                #     draw = ImageDraw.Draw(img)
                #     color = (0, 0, 0)
                #     draw.rectangle(gt_box, outline=color)
                #     draw.text(gt_box[:2], 'GT_'+classes[gt_label], fill=color)
                #
                #     img.save(os.path.join(cfg.OUTPUT_DIR, '{0:04d}_'.format(count)+os.path.basename(gt[0][0])))

                movie_name = os.path.basename(os.path.dirname(gt[0][0]))
                movie_parant_dir = os.path.basename(
                    os.path.dirname(os.path.dirname(gt[0][0])))
                pred_label = classes[
                    highest_conf_label] if highest_conf_label != -1 else "None"
                save_messe = [
                    gt[0][0],
                    os.path.join(movie_name, movie_parant_dir), iou, tp, fp,
                    fn, precision, classes[gt_label], pred_label, detect_time
                ]
                writer.writerow(save_messe)
                print(save_messe)

            print("total tp :", total_tp)
            print("total fp :", total_fp)
            print("total fn :", total_fn)
            f.close()
            print("proc finished.")
    output_img = args.output
    class_names = args.labels
    params_ = args.params

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=gpu_memory_fraction)
    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
    )

    with open(params_, "r") as readFile:
        params = json.load(readFile)

    origin_img = cv2.imread(input_img)
    resized_img = letter_box_image(origin_img,
                                   (params["input_w"], params["input_h"]), 128)
    img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32)
    classes = load_coco_names(class_names)
    colors = [(random.randint(0, 255), random.randint(0, 255),
               random.randint(0, 255)) for _ in range(len(classes))]

    frozenGraph = load_graph(frozen_model_path)

    boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)
    outputs = {}
    with tf.Session(graph=frozenGraph, config=config) as sess:
        for i in range(len(boxes)):
            outputs[boxes[i].name] = sess.run(boxes[i],
                                              feed_dict={inputs: [img]})
    detected_boxes = list(outputs.values())
예제 #19
0
def main(argv=None):

    img = Image.open('city.png')
    img_resized = letter_box_image(img, size, size, 128)
    img_resized = img_resized.astype(np.float32)
    classes = load_coco_names('coco.names')

    fake_boxes = {2: [(np.array([300, 200, 370, 250]), 1.)]}
    generated_boxes, g_indices = generate_ground_truth(fake_boxes, size, 0.4)
    draw_boxes(copy.deepcopy(generated_boxes), img, classes, (size, size),
               True)
    draw_boxes(copy.deepcopy(fake_boxes), img, classes, (size, size), True)
    # draw_boxes(filtered_boxes, img, classes, (size, size), True)
    img.save('out_fakeboxes.jpg')

    mask = np.zeros([1, 10647])
    for cls, indices in g_indices.items():
        mask[0, indices] = 1

    gt_tensor = np.zeros([1, 10647, 4 + 1 + len(classes)])
    for cls, boxes in generated_boxes.items():
        for i, box in enumerate(boxes):
            class_mask = np.zeros([len(classes)])
            class_mask[cls] = 1
            gt_row = [*np.asarray(box[0]), 1., *class_mask]
            gt_tensor[0, g_indices[cls][i]] = gt_row

    if frozen_model:

        t0 = time.time()
        frozenGraph = load_graph(frozen_model)
        print("Loaded graph in {:.2f}s".format(time.time() - t0))

        boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)

        with frozenGraph.as_default():
            fake_gt = tf.constant(gt_tensor, dtype=tf.float32)
            mask_tensor = tf.constant(mask, dtype=tf.float32)
            fake_loss = mse(fake_gt, boxes) * mask_tensor
            fake_loss = tf.reduce_mean(fake_loss, axis=-1)

            grad_op = tf.gradients(fake_loss, inputs)

        with tf.Session(graph=frozenGraph) as sess:
            t0 = time.time()
            for iters in range(num_iterations):
                grads = sess.run(grad_op, feed_dict={inputs: [img_resized]})

                grad = grads[0][0]
                sigma = (iters * 4.0) / num_iterations + 0.5
                grad_smooth1 = gaussian_filter(grad, sigma=sigma)
                grad_smooth2 = gaussian_filter(grad, sigma=sigma * 2)
                grad_smooth3 = gaussian_filter(grad, sigma=sigma * 0.5)
                grad = (grad_smooth1 + grad_smooth2 + grad_smooth3)

                step_size_scaled = step_size / (np.std(grad) + 1e-8)

                # Update the image by following the gradient.
                mod = grad * step_size_scaled

                grad_img = Image.fromarray(np.uint8(mod + 128))
                grad_img.save('out/grads/{}.png'.format(iters))

                img_resized = np.clip(img_resized - mod, 0, 255)
                new_img = Image.fromarray(np.uint8(img_resized))
                new_img.save('out/images/{}.png'.format(iters))

    else:
        if tiny:
            model = yolo_v3_tiny.yolo_v3_tiny
        else:
            model = yolo_v3.yolo_v3

        boxes, inputs = get_boxes_and_inputs(model, len(classes), size,
                                             data_format)

        saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))

        with tf.Session() as sess:
            t0 = time.time()
            saver.restore(sess, ckpt_file)
            print('Model restored in {:.2f}s'.format(time.time() - t0))

            t0 = time.time()
            detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})
예제 #20
0
 def prepare_image(self,img):
     cv2_im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     pil_im = Image.fromarray(cv2_im)
     img_resized = letter_box_image(pil_im, FLAGS.size, FLAGS.size, 128)
     img_resized = img_resized.astype(np.float32)
     return img_resized,pil_im
예제 #21
0
def main():

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=gpu_memory_fraction)

    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
    )
    #----------- Initialization --------------
    # Settings data+ following initializations
    classes = load_coco_names(class_names)
    cap = cv2.VideoCapture('video.avi')
    ret, _ = cap.read()
    plt.ion()
    frame_index = 0

    # defining model
    if frozen_model:  #The protobuf file contains the graph definition as well as the weights of the model.

        t0 = time.time()
        # loading model and related weights
        frozenGraph = load_graph(frozen_model)
        print("Loaded graph in {:.2f}s".format(time.time() - t0))

        boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)

        with tf.device("/GPU:0"):
            with tf.Session(graph=frozenGraph, config=config) as sess:
                # Is there any frame to read?
                while ret:
                    frame_index += 1
                    ret, frame = cap.read()
                    # applying transformation and apropriate changes to frame to feed the loaded model
                    img = Image.fromarray(
                        cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                    img_resized = letter_box_image(img, size, size, 128)
                    img_resized = img_resized.astype(np.float32)
                    t0 = time.time()
                    # feeding tensor to loaded model
                    detected_boxes = sess.run(
                        boxes, feed_dict={inputs: [img_resized]})
                    #obtaining the bounding boxes of detected objects
                    filtered_boxes = non_max_suppression(
                        detected_boxes,
                        confidence_threshold=conf_threshold,
                        iou_threshold=iou_threshold)
                    print("Predictions found in {:.2f}s".format(time.time() -
                                                                t0))

                    #croping and extracting bounding boxes of detected objects in frame
                    rois = draw_boxes(filtered_boxes, img, classes,
                                      (size, size), True)
                    if len(rois) > 0:
                        for i in range(len(rois)):
                            # saving the cropped images in Hard Disk = './extracted_regions/' Directory
                            rois[i].save('./extracted_regions/frame' +
                                         str(frame_index) + '_ExtObj_' +
                                         str(i) + '.jpg')
                    plt.imshow(np.array(img))
                    plt.pause(0.02)
                    plt.show()

    else:
        # using ckpt file for loading the model weights
        #----------- Initialization --------------
        saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))
        cap = cv2.VideoCapture('video.avi')
        ret, _ = cap.read()
        plt.ion()
        t0 = time.time()
        frame_index = 0

        # loading model and related weights
        if tiny:
            model = yolo_v3_tiny.yolo_v3_tiny
        else:
            model = yolo_v3.yolo_v3

        boxes, inputs = get_boxes_and_inputs(model, len(classes), size,
                                             data_format)
        t0 = time.time()
        saver.restore(sess, ckpt_file)
        print('Model restored in {:.2f}s'.format(time.time() - t0))

        with tf.Session(config=config) as sess:
            # is there any frame to read?
            while ret:
                frame_index += 1
                ret, frame = cap.read()
                # applying transformation and apropriate changes to frame to feed the loaded model
                img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                img_resized = letter_box_image(img, size, size, 128)
                img_resized = img_resized.astype(np.float32)
                t0 = time.time()
                # feeding tensor to loaded model
                detected_boxes = sess.run(boxes,
                                          feed_dict={inputs: [img_resized]})
                #obtaining the bounding boxes of detected objects
                filtered_boxes = non_max_suppression(
                    detected_boxes,
                    confidence_threshold=conf_threshold,
                    iou_threshold=iou_threshold)
                print("Predictions found in {:.2f}s".format(time.time() - t0))
                #croping and extracting bounding boxes of detected objects
                rois = draw_boxes(filtered_boxes, img, classes, (size, size),
                                  True)

                if len(rois) > 0:
                    for i in range(len(rois)):
                        # saving the cropped images in Hard Disk = './extracted_regions/' Directory
                        rois[i].save('./extracted_regions/frame' +
                                     str(frame_index) + '_ExtObj_' + str(i) +
                                     '.jpg')

                plt.imshow(np.array(img))
                plt.pause(0.02)
                plt.show()
예제 #22
0
conf_threshold = 0.5
iou_threshold = 0.4
classes = utils.load_coco_names(class_names)
out_image = './person.jpg'

t0 = time.time()
frozenGraph = utils.load_graph(frozen_model)
print("Loaded graph in {:.2f}s".format(time.time() - t0))
sess = tf.Session(graph=frozenGraph)

# image = cv2.imread(input_image)
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# image = Image.fromarray(image.astype('uint8')).convert('RGB')
# 上面三步等同于下面的Image.open()操作
image = Image.open(input_image)
img_resized = utils.letter_box_image(image, input_size, input_size, 128)
img_resized = img_resized.astype(np.float32)
boxes, inputs = utils.get_boxes_and_inputs_pb(frozenGraph)

t0 = time.time()
detected_boxes = sess.run(boxes, feed_dict={inputs: [img_resized]})
filtered_boxes = utils.non_max_suppression(detected_boxes,
                                           confidence_threshold=conf_threshold,
                                           iou_threshold=iou_threshold)
print("Predictions found in {:.2f}s".format(time.time() - t0))
if filtered_boxes:
    # if len(filtered_boxes[0][:]) == 1:
    img, region, score, box = utils.draw_boxes(filtered_boxes, image, classes,
                                               (input_size, input_size), True)
    # box = np.array(box)
    # print(box)
def main(argv=None):

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)

    config = tf.ConfigProto(
        gpu_options=gpu_options,
        log_device_placement=False,
    )

    # import sys
    # result=[]
    # with open("VOC2007/ImageSets/Main/test.txt",'r') as f:
    #     for line in f:
    # 	    result.append(f)
    # print(result)
    # with open("VOC2007/ImageSets/Main/test.txt", 'r') as f:
    #     line =[]
    #     while True:
    #         line = f.readline()     # 逐行读取
    #         if not line:
    #             break
    #         print(line)             # 这里加了 ',' 是为了避免 print 自动换行
    results = []
    f = open("VOC2007/ImageSets/Main/test.txt", "r")
    lines = f.readlines()  #读取全部内容 ,并以列表方式返回
    for line in lines:
        results.append(line.strip('\n').split(',')[0])

    # if FLAGS.frozen_model:

    #     t0 = time.time()
    #     frozenGraph = load_graph(FLAGS.frozen_model)
    #     print("Loaded graph in {:.2f}s".format(time.time()-t0))

    #     boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)

    #     with tf.Session(graph=frozenGraph, config=config) as sess:
    #         t0 = time.time()
    #         detected_boxes = sess.run(
    #             boxes, feed_dict={inputs: [img_resized]})

    # else:
    # if FLAGS.tiny:
    #     model = yolo_v3_tiny.yolo_v3_tiny
    # elif FLAGS.spp:
    #     model = yolo_v3.yolo_v3_spp
    # else:
    model = yolo_v3.yolo_v3
    classes = load_coco_names(FLAGS.class_names)
    boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size,
                                         FLAGS.data_format)
    saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))
    with tf.Session(config=config) as sess:
        t0 = time.time()
        saver.restore(sess, FLAGS.ckpt_file)
        print('Model restored in {:.2f}s'.format(time.time() - t0))

        t0 = time.time()
        # file_list = os.listdir('input/')

        for file in results:
            try:
                print('VOC2007/JPEGImages/' + str(file) + '.jpg')
                image = cv2.imread('VOC2007/JPEGImages/' + str(file) + '.jpg')
                print(image.shape)

                img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                # img = Image.open('VOC2007/JPEGImages/'+str(file)+'.jpg')
                img_resized = letter_box_image(img, FLAGS.size, FLAGS.size,
                                               128)
                img_resized = img_resized.astype(np.float32)
                detected_boxes = sess.run(boxes,
                                          feed_dict={inputs: [img_resized]})

                filtered_boxes = non_max_suppression(
                    detected_boxes,
                    confidence_threshold=FLAGS.conf_threshold,
                    iou_threshold=FLAGS.iou_threshold)
                print("Predictions found in {:.2f}s".format(time.time() - t0))

                draw_boxes(filtered_boxes, img, classes,
                           (FLAGS.size, FLAGS.size), True)

                img.save('output/' + file + '.jpg')
            except ValueError:
                pass