示例#1
0
    def detect_image(self, image):
        if self.model_image_size != (None, None):
            assert self.model_image_size[
                0] % 32 == 0, 'Multiples of 32 required'
            assert self.model_image_size[
                1] % 32 == 0, 'Multiples of 32 required'
            boxed_image = letterbox_image(
                image, tuple(reversed(self.model_image_size)))
        else:
            new_image_size = (image.width - (image.width % 32),
                              image.height - (image.height % 32))
            boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.'

        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                #K.learning_phase(): False
            })

        # print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

        return out_boxes, out_scores, out_classes
示例#2
0
def process_image(img_path, input_shape):
    """
    将输入的图片处理成模型的标准输入形状
    """

    image = Image.open(img_path)
    image_shape = (image.width, image.height)

    # 改变image的形状
    if input_shape != (None, None):
        assert input_shape[0]%32 == 0, 'Multiples of 32 required'
        assert input_shape[1]%32 == 0, 'Multiples of 32 required'
        boxed_image = letterbox_image(image, tuple(reversed(input_shape)))
    else:
        new_image_size = (image.width - (image.width % 32),
                            image.height - (image.height % 32))
        boxed_image = letterbox_image(image, new_image_size)
    image_data = np.array(boxed_image, dtype='float32')

    # 归一化
    image_data /= 255.
    # Add batch dimension. (w, h, 3) -> (m, w, h, 3)
    image_data = np.expand_dims(image_data, 0)

    print(image_data.shape)

    return image_data, image_shape
示例#3
0
    def get_classification(self, im):
        start = timer()

        image = Image.fromarray(im[..., ::-1])

        if self.model_image_size != (None, None):
            assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
            assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
            boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
        else:
            new_image_size = (image.width - (image.width % 32),
                              image.height - (image.height % 32))
            boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })

        max_class = None
        max_score = 0.0
        if out_scores.size > 0:
            max_score_idx = out_scores.argmax()
            max_class = out_classes[max_score_idx]
            max_score = out_scores[max_score_idx]
            # print(max_class, max_score, max_score_idx)

        end = timer()

        # print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
        # for i, score in enumerate(out_scores):
            # print(i, out_classes[i], score)

        if max_class is not None:
            predicted_class = self.class_names[max_class]
            dt = end - start
            # print("Found traffic light: {light:%s score:%.3f dt:%.3f}"%(predicted_class, max_score, dt))

        rtn = TrafficLight.UNKNOWN

        if max_class == 0:
            rtn = TrafficLight.RED
        elif max_class == 1:
            rtn = TrafficLight.YELLOW
        elif max_class == 2:
            rtn = TrafficLight.GREEN

        self.last_pred = rtn
        self.image_count += 1

        return rtn, max_score
示例#4
0
def prepare_infer_input(img_path, input_size, transform):
    t = time.time()
    img = cv2.imread(img_path)
    # print('read file time:', time.time() - t)
    # keep ratio resize to input size
    new_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    t = time.time()
    new_img, scale, shift = letterbox_image(new_img, input_size)
    # print('letterbox_image time:', time.time() - t)

    # t = time.time()
    # model_input = torch.Tensor(new_img).cuda()
    # print('image to cuda time:', time.time() - t)

    # mean = torch.Tensor(mean).cuda()
    # std = torch.Tensor(std).cuda()

    t = time.time()
    # model_input = (model_input / 255.0 - mean) / std
    # model_input = model_input.permute([2, 0, 1]).unsqueeze(0)
    model_input = transform(new_img).unsqueeze(0)
    # print('transform time:', time.time() - t)

    return img, model_input, scale, shift
示例#5
0
def table_line(img,
               size=(512, 512),
               hprob=0.5,
               vprob=0.5,
               row=50,
               col=30,
               alph=15):
    sizew, sizeh = size
    inputBlob, fx, fy = letterbox_image(img[..., ::-1], (sizew, sizeh))

    with Timer('predict table lines'):
        pred = model.predict(np.array([np.array(inputBlob) / 255.0]))
    pred = pred[0]
    vpred = pred[..., 1] > vprob  # 竖线 boolean
    hpred = pred[..., 0] > hprob  # 横线 boolean
    vpred = vpred.astype(int)
    hpred = hpred.astype(int)
    colboxes = get_table_line(vpred, axis=1, lineW=col)
    rowboxes = get_table_line(hpred, axis=0, lineW=row)
    ccolbox = []
    crowlbox = []
    if len(rowboxes) > 0:
        rowboxes = np.array(rowboxes)
        rowboxes[:, [0, 2]] = rowboxes[:, [0, 2]] / fx
        rowboxes[:, [1, 3]] = rowboxes[:, [1, 3]] / fy
        xmin = rowboxes[:, [0, 2]].min()
        xmax = rowboxes[:, [0, 2]].max()
        ymin = rowboxes[:, [1, 3]].min()
        ymax = rowboxes[:, [1, 3]].max()
        ccolbox = [[xmin, ymin, xmin, ymax], [xmax, ymin, xmax, ymax]]
        rowboxes = rowboxes.tolist()

    if len(colboxes) > 0:
        colboxes = np.array(colboxes)
        colboxes[:, [0, 2]] = colboxes[:, [0, 2]] / fx
        colboxes[:, [1, 3]] = colboxes[:, [1, 3]] / fy

        xmin = colboxes[:, [0, 2]].min()
        xmax = colboxes[:, [0, 2]].max()
        ymin = colboxes[:, [1, 3]].min()
        ymax = colboxes[:, [1, 3]].max()
        colboxes = colboxes.tolist()
        crowlbox = [[xmin, ymin, xmax, ymin], [xmin, ymax, xmax, ymax]]

    rowboxes += crowlbox
    colboxes += ccolbox

    #
    rboxes_row_, rboxes_col_ = adjust_lines(rowboxes, colboxes, alph=alph)
    rowboxes += rboxes_row_
    colboxes += rboxes_col_
    nrow = len(rowboxes)
    ncol = len(colboxes)
    for i in range(nrow):
        for j in range(ncol):
            rowboxes[i] = line_to_line(rowboxes[i], colboxes[j], 10)
            colboxes[j] = line_to_line(colboxes[j], rowboxes[i], 10)

    return rowboxes, colboxes
    def detect_image(self, image):
        print('get detect_image--------------------')
        if self.model_image_size != (None, None):
            assert self.model_image_size[
                0] % 32 == 0, 'Multiples of 32 required'
            assert self.model_image_size[
                1] % 32 == 0, 'Multiples of 32 required'
            boxed_image = letterbox_image(
                image, tuple(reversed(self.model_image_size)))
        else:
            new_image_size = (image.width - (image.width % 32),
                              image.height - (image.height % 32))
            boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        print(image_data.shape)
        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        model_input = self.graph.get_tensor_by_name("input_1:0")
        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                model_input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]]
            })

        results = []
        for i, c in reversed(list(enumerate(out_classes))):
            prdicted_class = self.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]
            result = []
            result.append(prdicted_class)
            result.append(score)

            box[1] = max(0, np.floor(box[1] + 0.5).astype('int32'))
            box[0] = max(0, np.floor(box[0] + 0.5).astype('int32'))
            box[3] = min(image.size[0], np.floor(box[3] + 0.5).astype('int32'))
            box[2] = min(image.size[1], np.floor(box[2] + 0.5).astype('int32'))

            result.append(box)

            results.append(result)
        return results
示例#7
0
def detect_image(weights, image_url, img_size, conf_thres, iou_thres):

    start_time = time.time()

    #image = cv2.imread(image_url)
    image = Image.open(image_url)
    original_size = image.size[:2]
    size = (img_size, img_size)
    image_resized = letterbox_image(image, size)
    img = np.asarray(image)

    #image = ImageOps.fit(image, size, Image.ANTIALIAS)
    image_array = np.asarray(image_resized)

    normalized_image_array = image_array.astype(np.float32) / 255.0

    yolov5_tflite_obj = yolov5_tflite(weights, img_size, conf_thres, iou_thres)

    result_boxes, result_scores, result_class_names = yolov5_tflite_obj.detect(
        normalized_image_array)

    if len(result_boxes) > 0:
        result_boxes = scale_coords(size, np.array(result_boxes),
                                    (original_size[1], original_size[0]))
        font = cv2.FONT_HERSHEY_SIMPLEX

        # org
        org = (20, 40)

        # fontScale
        fontScale = 0.5

        # Blue color in BGR
        color = (0, 255, 0)

        # Line thickness of 1 px
        thickness = 1

        for i, r in enumerate(result_boxes):

            org = (int(r[0]), int(r[1]))
            cv2.rectangle(img, (int(r[0]), int(r[1])), (int(r[2]), int(r[3])),
                          (255, 0, 0), 1)
            cv2.putText(
                img,
                str(int(100 * result_scores[i])) + '%  ' +
                str(result_class_names[i]), org, font, fontScale, color,
                thickness, cv2.LINE_AA)

        save_result_filepath = image_url.split('/')[-1].split(
            '.')[0] + 'yolov5_output.jpg'
        cv2.imwrite(save_result_filepath, img[:, :, ::-1])

        end_time = time.time()

        print('FPS:', 1 / (end_time - start_time))
        print('Total Time Taken:', end_time - start_time)
示例#8
0
def detect(image_path, model_path, yolo_weights=None):
    """
    Introduction
    ------------
        加载模型,进行预测
    Parameters
    ----------
        model_path: 模型路径
        image_path: 图片路径
    """
    image = Image.open(image_path)
    resize_image = letterbox_image(image, (416, 416))
    image_data = np.array(resize_image, dtype=np.float32)
    image_data /= 255.
    image_data = np.expand_dims(image_data, axis=0)

    pb_graph = tf.Graph()
    with pb_graph.as_default():
        input_image_shape = tf.placeholder(dtype=tf.int32,
                                           shape=(2, ),
                                           name="pred_im_shape")
        input_image = tf.placeholder(shape=[None, 416, 416, 3],
                                     dtype=tf.float32,
                                     name='pred_input_img')
        predictor = yolo_predictor(config.obj_threshold,
                                   config.nms_iou_threshold,
                                   config.classes_path, config.anchors_path)
        boxes, scores, classes = predictor.predict(input_image,
                                                   input_image_shape)
        print(input_image_shape)
        print(input_image)
        print(boxes)
        print(scores)
        print(classes)
    with tf.Session(graph=pb_graph) as sess:
        saver = tf.train.Saver()
        saver.restore(sess, model_path)
        out_boxes, out_scores, out_classes = sess.run(
            [boxes, scores, classes],
            feed_dict={
                input_image: image_data,
                input_image_shape: [image.size[1], image.size[0]]
            })
        graph_def = tf.get_default_graph().as_graph_def()
        out_put_name_list = [
            'predict/pred_boxes', 'predict/pred_scores', 'predict/pred_classes'
        ]
        out_put_grah_def = tf.graph_util.convert_variables_to_constants(
            sess, graph_def, out_put_name_list)
        pb_file_path = 'F:\\github_working\\version_2_190114\\alsochen-tensorflow-yolo3-threeoutput\\tensorflow-yolo3\\pb_file\\model.pb'
        with tf.gfile.GFile(pb_file_path, 'wb') as f:
            f.write(out_put_grah_def.SerializeToString())
            print("pb save done")
        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
示例#9
0
def table_detect(img, sc=(416, 416), thresh=0.5, NMSthresh=0.3):
    """
    表格检测
    img:GBR
    
    """
    scale = sc[0]
    img_height, img_width = img.shape[:2]
    inputBlob, fx, fy = letterbox_image(img[..., ::-1], (scale, scale))
    inputBlob = cv2.dnn.blobFromImage(inputBlob,
                                      scalefactor=1.0,
                                      size=(scale, scale),
                                      swapRB=True,
                                      crop=False)
    tableDetectNet.setInput(inputBlob / 255.0)
    outputName = tableDetectNet.getUnconnectedOutLayersNames()
    outputs = tableDetectNet.forward(outputName)
    class_ids = []
    confidences = []
    boxes = []
    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > thresh:
                center_x = int(detection[0] * scale / fx)
                center_y = int(detection[1] * scale / fy)
                width = int(detection[2] * scale / fx)
                height = int(detection[3] * scale / fy)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                if class_id == 1:
                    class_ids.append(class_id)
                    confidences.append(float(confidence))
                    xmin, ymin, xmax, ymax = left, top, left + width, top + height
                    xmin = max(xmin, 1)
                    ymin = max(ymin, 1)
                    xmax = min(xmax, img_width - 1)
                    ymax = min(ymax, img_height - 1)
                    boxes.append([xmin, ymin, xmax, ymax])

    boxes = np.array(boxes)

    confidences = np.array(confidences)
    if len(boxes) > 0:
        boxes, confidences = nms_box(boxes,
                                     confidences,
                                     score_threshold=thresh,
                                     nms_threshold=NMSthresh)

    boxes, adBoxes = fix_table_box_for_table_line(boxes, confidences, img)
    return boxes, adBoxes, confidences
    def forward(self, bottom, top):
        pic_name = self.fp.readline()
        if pic_name == "":
            self.fp.seek(0)
            pic_name = self.fp.readline()
        pic_name = pic_name.strip('\n')
        print(pic_name)

        img = cv2.imread(pic_name)
        transformed_image = letterbox_image(img, self.input_hw, self.input_hw)

        # Reshape net's input blobs
        top[0].reshape(1, 3, self.input_hw, self.input_hw)
        # Copy data into net's input blobs
        top[0].data[...] = transformed_image
示例#11
0
def eval_neg(model_path, neg_path, yolo_weights=None):
    """
        Introduction
        ------------
            计算模型在negtive datasets验证集上的MAP, 用于评价模型
    """
    input_image_shape = tf.placeholder(dtype=tf.int32, shape=(2, ))
    input_image = tf.placeholder(shape=[None, 416, 416, 3], dtype=tf.float32)
    predictor = yolo_predictor(config.obj_threshold, config.nms_threshold,
                               config.classes_path, config.anchors_path)
    boxes, scores, classes = predictor.predict(input_image, input_image_shape)

    image_files = os.listdir(neg_path)
    tp_nums = 0
    all_nums = len(image_files)
    with tf.Session() as sess:
        if yolo_weights is not None:
            with tf.variable_scope('predict'):
                boxes, scores, classes = predictor.predict(
                    input_image, input_image_shape)
            load_op = load_weights(tf.global_variables(scope='predict'),
                                   weights_file=yolo_weights)
            sess.run(load_op)
        else:
            saver = tf.train.Saver()
            ckpt = tf.train.get_checkpoint_state(model_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
            # saver.restore(sess, model_path)

        for image_file in image_files:
            image = Image.open(neg_path + image_file)
            resize_image = letterbox_image(image, (416, 416))
            image_data = np.array(resize_image, dtype=np.float32)
            image_data /= 255.
            image_data = np.expand_dims(image_data, axis=0)

            out_boxes, out_scores, out_classes = sess.run(
                [boxes, scores, classes],
                feed_dict={
                    input_image: image_data,
                    input_image_shape: [image.size[1], image.size[0]]
                })
            print(image_file)
            print(out_classes)
            print(out_scores)
            if len(out_classes) == 0:
                tp_nums += 1
        print(tp_nums / all_nums)
示例#12
0
    def __getitem__(self, index):
        data_anno = self.data_list[index]
        img_path = data_anno['img_path']
        img_path = os.path.join(self.root_dir, img_path)
        bboxes = data_anno['bboxes']
        cls_ids = data_anno['cls_ids']

        img = cv2.imread(img_path)
        if img is None:
            print("Error: read %s fail" % img_path)
            exit()
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # keep ratio resize to input size
        img, scale, shift = letterbox_image(img, self.size)
        bboxes = np.array(bboxes, dtype=np.float)
        bboxes[:, :4] = bboxes[:, :4] * scale
        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + shift[0]
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + shift[1]

        # augument
        if self.phase == 'train':
            transformed = self.aug(image=img, bboxes=bboxes, cls_ids=cls_ids)
            img = Image.fromarray(transformed['image'])
            bboxes = np.array(transformed['bboxes'])
            cls_ids = np.array(transformed['cls_ids'])

        obj_num = len(bboxes)

        if obj_num == 0:
            print('obj_num == 0')
            bboxes = np.zeros((MAX_OBJ_NUM, 4), dtype=float)
            cls_ids = np.zeros(MAX_OBJ_NUM, dtype=int)
        elif obj_num < MAX_OBJ_NUM:
            bboxes = np.pad(bboxes, ((0, MAX_OBJ_NUM - obj_num), (0, 0)))
            cls_ids = np.pad(cls_ids, (0, MAX_OBJ_NUM - obj_num))

        scale_shift = torch.Tensor([scale] + shift)
        # to tensor and normalize
        img = self.to_tensor(img)
        targets = {
            'bboxes': bboxes,
            'cls': cls_ids,
            'obj_num': obj_num,
            'transform': scale_shift,
            'img_path': img_path
        }

        return img, targets
    def predict(self, image_path):
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        new_img = letterbox_image(img, self.nw, self.nh)

        self.yolo.blobs['data'].reshape(1, 3, self.nw, self.nh)
        self.yolo.blobs['data'].data[...] = new_img
        out_feats = self.yolo.forward()
        layer82_conv = out_feats['layer82-conv']
        layer94_conv = out_feats['layer94-conv']
        layer106_conv = out_feats['layer106-conv']

        batch_out = {}
        feat = [layer82_conv[0], layer94_conv[0], layer106_conv[0]]
        output = self.yolo_out(feat, img.shape)
        if not output:
            batch_out[0] = []
        else:
            batch_out[0] = output

        return batch_out
示例#14
0
    def ret_frame(cls, image):
        # Generate colors for drawing bounding boxes.
        hsv_tuples = [(x / 10, 1., 1.) for x in range(10)]
        colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                colors))
        np.random.seed(10101)  # Fixed seed for consistent colors across runs.
        np.random.shuffle(
            colors)  # Shuffle colors to decorrelate adjacent classes.
        np.random.seed(None)  # Reset seed to default.

        start = timer()

        model_image_size = (608, 608)
        class_names = cls._get_class()

        new_image_size = (image.width - (image.width % 32),
                          image.height - (image.height % 32))
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        image_shape = [image.size[1], image.size[0]]

        out_boxes, out_scores, out_classes = cls.compute_output(
            image_data, image_shape)

        Car_result_ALL = []
        Pedestrian_result_ALL = []
        all_result = []

        font = ImageFont.truetype(font='../box_font/FiraMono-Medium.otf',
                                  size=np.floor(3e-2 * image.size[1] +
                                                0.5).astype('int32'))

        thickness = (image.size[0] + image.size[1]) // 300

        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            label = '{}_{:.2f}_{}'.format(
                predicted_class, score,
                str(cls.IDvalue))  #put the ID for each obj
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)

            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            #JSON 形式の時はint32()未対応のため -> int()に変換する
            top = int(top)
            left = int(left)
            bottom = int(bottom)
            right = int(right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            #1 予測結果より次のFrameの物体位置を予測
            #nxt_result_txt = ' {},{},{},{},{}'.format(left, top, right, bottom, c)

            #2 検出したbox_sizeを計算する 設定した閾値1024pix**2
            sq_bdbox = (bottom - top) * (right - left)

            if sq_bdbox >= 1024:  #矩形サイズの閾値
                if predicted_class == 'Car' or predicted_class == 'Pedestrian':  # Car or Pedes
                    # My kingdom for a good redistributable image drawing library.
                    for i in range(thickness):
                        draw.rectangle(
                            [left + i, top + i, right - i, bottom - i],
                            outline=colors[c])
                    draw.rectangle(
                        [tuple(text_origin),
                         tuple(text_origin + label_size)],
                        fill=colors[c])
                    draw.text(text_origin, label, fill=(0, 0, 0), font=font)
                    del draw

        end = timer()
        print("1フレームの処理時間 = ", end - start)
        return image
示例#15
0
def detect(image, yolo_weights = config.yolo3_weights_path,image_size=(416,416)):
    """
    Introduction
    ------------
        加载模型,进行预测
    Parameters
    ----------
        model_path: 模型路径
        image_path: 图片路径
    """
    image = Image.open(image)
    if image_size != (None, None):

        assert image_size[0] % 32 == 0, 'Multiples of 32 required'
        assert image_size[1] % 32 == 0, 'Multiples of 32 required'
        resize_image = letterbox_image(image, tuple(reversed(image_size)))
    else:
        new_image_size = (image.width - (image.width % 32),
                          image.height - (image.height % 32))
        resize_image = letterbox_image(image, new_image_size)

    image_data = np.array(resize_image, dtype = 'float32')
    image_data /= 255.
    image_data = np.expand_dims(image_data, axis = 0)
    print(image_data.shape)
    input_image_shape = tf.placeholder(dtype = tf.int32, shape = (2,))
    input_image = tf.placeholder(shape = [None, 416, 416, 3], dtype = tf.float32)
    predictor = yolo_predictor(config.obj_threshold, config.nms_threshold, config.classes_path, config.anchors_path)
    boxes, scores, classes = predictor.predict(input_image, input_image_shape)
    with tf.Session() as sess:
            if yolo_weights is not None:
                print("yes")
                with tf.variable_scope('predict'):
                    boxes, scores, classes = predictor.predict(input_image, input_image_shape)
                load_op = load_weights(tf.global_variables(scope = 'predict'), weights_file = yolo_weights)
                sess.run(load_op)
            else:
                saver = tf.train.Saver()
                saver.restore(sess,  config.yolo3_weights_path)
            out_boxes, out_scores, out_classes = sess.run(
                [boxes, scores, classes],
                feed_dict={
                    input_image: image_data,
                    input_image_shape: [image.size[1], image.size[0]]
                })
            print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
            font = ImageFont.truetype(font = 'font/FiraMono-Medium.otf', size = np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
            thickness = (image.size[0] + image.size[1]) // 300

            for i, c in reversed(list(enumerate(out_classes))):
                predicted_class = predictor.class_names[c]
                box = out_boxes[i]
                score = out_scores[i]

                label = '{} {:.2f}'.format(predicted_class, score)
                draw = ImageDraw.Draw(image)
                label_size = draw.textsize(label, font)

                top, left, bottom, right = box
                top = max(0, np.floor(top + 0.5).astype('int32'))
                left = max(0, np.floor(left + 0.5).astype('int32'))
                bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
                right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
                print(label, (left, top), (right, bottom))

                if top - label_size[1] >= 0:
                    text_origin = np.array([left, top - label_size[1]])
                else:
                    text_origin = np.array([left, top + 1])

                # My kingdom for a good redistributable image drawing library.
                for i in range(thickness):
                    draw.rectangle(
                        [left + i, top + i, right - i, bottom - i],
                        outline = predictor.colors[c])
                draw.rectangle(
                    [tuple(text_origin), tuple(text_origin + label_size)],
                    fill = predictor.colors[c])
                draw.text(text_origin, label, fill=(0, 0, 0), font=font)
                del draw
            result = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
            result = np.asarray(result)
            cv2.imwrite("./output.png", result)
示例#16
0
    def detect_image(cls, image, frame_num, all_posinf, old_posinf):
        start = timer()

        model_image_size = (608, 608)
        class_names = cls._get_class()

        new_image_size = (image.width - (image.width % 32),
                                image.height - (image.height % 32))
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        image_shape = [image.size[1], image.size[0]]

        out_boxes, out_scores, out_classes = cls.compute_output(image_data, image_shape)

        Car_result_ALL = []
        Pedestrian_result_ALL = []
        all_result = []

        # フレーム単位の処理
        if frame_num > 1:
            old_posinf.clear()
            old_posinf = copy.copy(all_posinf)
            all_posinf.clear()

        #オブジェクト単位の処理
        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            label = '{} {:.2f}'.format(predicted_class, score)

            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            #JSON 形式の時はint32()未対応のため -> int()に変換する
            top = int(top)
            left = int(left)
            bottom = int(bottom)
            right = int(right)

            #2 検出したbox_sizeを計算する 設定した閾値1024pix**2
            sq_bdbox = (bottom - top)*(right - left)

            #3 検出したboxの中心点の座標を計算する
            center_bdboxX = int((bottom - top)/2) + top
            center_bdboxY = int((right - left)/2) + left

            if sq_bdbox >= 1024:#矩形サイズの閾値
                if predicted_class == 'Car':
                    ObjID_set = 0

                    if frame_num == 1:#1フレーム目は全て登録する
                        cls.IDvalue_car = cls.IDvalue_car + 1
                        #車を検出した時
                        ObjID_set = cls.IDvalue_car
                        Car_result = {'id': ObjID_set, 'box2d': [left,top,right,bottom]}#予測結果
                        #予測結果より次のFrameの物体位置を予測する情報を作成
                        tmp_car = {'frame':frame_num,'id':ObjID_set, 'left':left, 'top':top, 'right':right, 'bottom':bottom}
                        all_posinf.append(tmp_car)
                    else:
                        #current_pos check
                        cls.matches_cnt = 0

                        for kt in range(len(old_posinf)):
                            tmp_old_pos = old_posinf[kt]

                            tmp_ObjID = 0
                            tmp_left = 0
                            tmp_top = 0
                            tmp_right = 0
                            tmp_bottom = 0

                            for k, v in tmp_old_pos.items():
                                # k= Tanaka v= 80 // Tanaka: 80
                                if k == "id":
                                    print("Key = ", k)
                                    print("Value = ",v)
                                    tmp_ObjID = v
                                elif k == "left":
                                    print("Key = ", k)
                                    print("Value = ",v)
                                    tmp_left = v
                                elif k == "top":
                                    print("Key = ", k)
                                    print("Value = ",v)
                                    tmp_top = v
                                elif k == "right":
                                    print("Key = ", k)
                                    print("Value = ",v)
                                    tmp_right = v
                                elif k == "bottom":
                                    print("Key = ", k)
                                    print("Value = ",v)
                                    tmp_bottom = v
                            if (tmp_left <= center_bdboxX <= tmp_right) and (tmp_top <= center_bdboxY <= tmp_bottom):
                                ObjID_set = tmp_ObjID
                                cls.matches_cnt = cls.matches_cnt + 1
                                #該当する
                            #else:

                        #もしどのIDにも当てはまらない場合
                        if cls.matches_cnt == 0:
                            cls.IDvalue_car = cls.IDvalue_car + 1
                            ObjID_set = cls.IDvalue_car
                        #else:
                            #ObjID_set = tmp_ObjID

                        #更新したObjIDを登録する
                        tmp_car = {'frame':frame_num,'id':ObjID_set, 'left':left, 'top':top, 'right':right, 'bottom':bottom}
                        all_posinf.append(tmp_car)

                        #車を検出した時
                        Car_result = {'id': ObjID_set, 'box2d': [left,top,right,bottom]}#予測結果

                    #検出したオブジェクトを格納 検出しない場合は空欄が格納される
                    Car_result_ALL.append(Car_result)#車

                elif predicted_class == 'Pedestrian':
                    cls.IDvalue_ped = cls.IDvalue_ped + 1
                    #歩行者を検出した時
                    Pedestrian_result = {'id': int(cls.IDvalue_ped), 'box2d': [left,top,right,bottom]}#予測結果

                    #予測結果より次のFrameの物体位置を予測する情報を作成
                    tmp_ped = {'frame':frame_num,'id':int(cls.IDvalue_ped), 'left':left, 'top':top, 'right':right, 'bottom':bottom}
                    cls.all_ObjectID_pos.append(tmp_ped)

                    #検出したオブジェクトを格納 検出しない場合は空欄が格納される
                    Pedestrian_result_ALL.append(Pedestrian_result)#歩行者

        all_result = {'Car': Car_result_ALL, 'Pedestrian': Pedestrian_result_ALL}
        end = timer()
        print("1フレームの処理時間 = ", end - start)
        return all_result
示例#17
0
    def detect_image(self, image):
        start = timer()

        # 将图片大小缩放到32的倍数,便于在卷积神经网络中进行运算
        if self.model_image_size != (None, None):
            assert self.model_image_size[
                0] % 32 == 0, 'Multiples of 32 required'
            assert self.model_image_size[
                1] % 32 == 0, 'Multiples of 32 required'
            boxed_image = letterbox_image(
                image, tuple(reversed(self.model_image_size)))
        else:
            new_image_size = (image.width - (image.width % 32),
                              image.height - (image.height % 32))
            boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        print(image_data.shape)

        # 归一化
        image_data /= 255.
        # Add batch dimension. (w, h, 3) -> (m, w, h, 3)
        image_data = np.expand_dims(image_data, 0)

        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })

        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

        font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
                                  size=np.floor(3e-2 * image.size[1] +
                                                0.5).astype('int32'))
        thickness = (image.size[0] + image.size[1]) // 300

        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = self.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)

            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
            print(label, (left, top), (right, bottom))

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            # My kingdom for a good redistributable image drawing library.
            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[c])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[c])
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)
            del draw

        end = timer()
        print(end - start)
        return image
示例#18
0
    def ret_frame(cls, image, cv2image, frame_num):
        # Generate colors for drawing bounding boxes.
        hsv_tuples = [(x / 10, 1., 1.) for x in range(10)]
        colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
        np.random.seed(10101)  # Fixed seed for consistent colors across runs.
        np.random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
        np.random.seed(None)  # Reset seed to default.

        start = timer()
      
        model_image_size = (608, 608)
        class_names = cls._get_class()

        new_image_size = (image.width - (image.width % 32),
                                image.height - (image.height % 32))
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
      
        image_shape = [image.size[1], image.size[0]]

        out_boxes, out_scores, out_classes = cls.compute_output(image_data, image_shape)
        
        font = ImageFont.truetype(font='../box_font/FiraMono-Medium.otf',
                                  size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))

        thickness = (image.size[0] + image.size[1]) // 300
        
        # Check new object or not
        # cls.all_ObjectID_pos.append(tmp_ped)
        # tmp_ped = {'frame':frame_num,'id':int(cls.IDvalue), 'left':left, 'top':top, 'right':right, 'bottom':bottom}
        #print("len(cls.all_ObjectID_pos) = ", len(cls.all_ObjectID_pos))#
        #for kt in range(len(cls.all_ObjectID_pos)):
        #    tmp_current_pos = cls.all_ObjectID_pos[kt]#ObjectID毎に抽出
        #    
        #    for k, v in tmp_current_pos.items():
        #        # k= Tanaka v= 80 // Tanaka: 80
        #        if k == "frame":
        #            print("Key = ", k)
        #            print("Value = ",v)
        #            
        #            #if (frame_num > 1) and ((frame_num-1) == k):#一つ前のフレームNoの時
        #            #    id:
        #            #    if
        #        if k == "id":
        #            print("Key = ", k)
        #            print("Value = ",v)             

        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = class_names[c]
            box = out_boxes[i]
            score = out_scores[i]
            
            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            #JSON 形式の時はint32()未対応のため -> int()に変換する
            top = int(top)
            left = int(left)
            bottom = int(bottom)
            right = int(right)
            
            #2 検出したbox_sizeを計算する 設定した閾値1024pix**2
            sq_bdbox = (bottom - top)*(right - left) 

            if sq_bdbox >= 1024:#矩形サイズの閾値
                if predicted_class == 'Car'or predicted_class == 'Pedestrian':# Car or Pedes
                    cls.IDvalue = cls.IDvalue + 1
                    label = '{}_{:.2f}_{}'.format(predicted_class, score, str(cls.IDvalue))#put the ID for each obj
                    draw = ImageDraw.Draw(image)
                    label_size = draw.textsize(label, font)
                    if top - label_size[1] >= 0:
                        text_origin = np.array([left, top - label_size[1]])
                    else:
                        text_origin = np.array([left, top + 1])
            
                    # My kingdom for a good redistributable image drawing library.
                    for i in range(thickness):
                        draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[c])
                    draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c])
                    draw.text(text_origin, label, fill=(0, 0, 0), font=font)
                    del draw
        
        end = timer()
        print("1フレームの処理時間 = ", end - start)
        return image
示例#19
0
    def ret_frame(cls, image, frame_num):
        hsv_tuples = [(x / 10, 1., 1.) for x in range(10)]
        colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
        np.random.seed(10101)  # Fixed seed for consistent colors across runs.
        np.random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
        np.random.seed(None)  # Reset seed to default.

        start = timer()

        model_image_size = (608, 608)
        class_names = cls._get_class()

        new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32))
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        image_shape = [image.size[1], image.size[0]]

        out_boxes, out_scores, out_classes = cls.compute_output(image_data, image_shape)

        font = ImageFont.truetype(font='../box_font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
        thickness = (image.size[0] + image.size[1]) // 300

        # フレーム単位の処理
        if frame_num > 1:
            cls.all_ObjectID_oldpos = copy.copy(cls.all_ObjectID_pos)
            #cls.all_ObjectID_pos.clear()
            cls.all_ObjectID_pos = []

        # オブジェクト単位の処理
        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            #JSON 形式の時はint32()未対応のため -> int()に変換する
            top, left, bottom, right = box
            top    = int(max(0, np.floor(top + 0.5).astype('int32')))
            left   = int(max(0, np.floor(left + 0.5).astype('int32')))
            bottom = int(min(image.size[1], np.floor(bottom + 0.5).astype('int32')))
            right  = int(min(image.size[0], np.floor(right + 0.5).astype('int32')))

            #2 検出したbox_sizeを計算する 設定した閾値1024pix**2
            sq_bdbox = (bottom - top)*(right - left)

            cls.ObjID_setimg = 0
            cls.matches_cnt = 0
            print("len(cls.all_ObjectID_oldpos) = ", len(cls.all_ObjectID_oldpos))

            if sq_bdbox >= 1024:#矩形サイズの閾値 1024
                #if predicted_class == 'Car'or predicted_class == 'Pedestrian':# Car or Pedes
                if predicted_class == 'Car':

                    #print("cls.all_ObjectID_oldpos = ", cls.all_ObjectID_oldpos)

                    #3 検出したboxの中心点座標を計算する
                    center_bdboxX = int((bottom - top)/2) + top
                    center_bdboxY = int((right - left)/2) + left

                    if frame_num == 1:#1フレーム目は全て登録する
                        cls.IDvalue_car = cls.IDvalue_car + 1
                        cls.ObjID_setimg = cls.IDvalue_car
                    else:
                        for kt in range(len(cls.all_ObjectID_oldpos)):
                            tmp_old_pos = cls.all_ObjectID_oldpos[kt]
                            #print("tmp_old_pos = ", tmp_old_pos)

                            cls.ObjID_setimg = cls.getValue('id', tmp_old_pos)
                            cls.old_left = cls.getValue('left', tmp_old_pos)
                            cls.old_top  = cls.getValue('top', tmp_old_pos)
                            cls.old_right = cls.getValue('right', tmp_old_pos)
                            cls.old_bottom = cls.getValue('bottom', tmp_old_pos)

                            #print("ObjID_setimg = ", cls.ObjID_setimg)
                            #print("old_left = ", cls.old_left)
                            #print("old_top = ",cls.old_top)
                            #print("old_right = ",cls.old_right)
                            #print("old_bottom = ",cls.old_bottom)

                            band_value = 15
                            exp_old_left = int(cls.old_left - band_value)
                            exp_old_top = int(cls.old_top - band_value)
                            exp_old_right = int(cls.old_right + band_value)
                            exp_old_bottom = int(cls.old_bottom + band_value)

                            #print("center_bdboxX = ", center_bdboxX)
                            #print("center_bdboxY = ", center_bdboxY)

                            #print("exp_old_left = ", exp_old_left)
                            #print("exp_old_top = ", exp_old_top)
                            #print("exp_old_right = ",exp_old_right)
                            #print("exp_old_bottom = ",exp_old_bottom)

                            if(( center_bdboxX >= exp_old_left ) or ( center_bdboxX <= exp_old_right )):
                                if(( center_bdboxY >= exp_old_top) or ( center_bdboxY <= exp_old_bottom )):
                                    cls.matches_cnt = cls.matches_cnt + 1

                        #前回フレームより過去のオブジェクトを全てチェックした結果を出力
                        print("cls.matches_cnt = ", cls.matches_cnt)

                        #もしどのIDにも当てはまらない場合
                        if cls.matches_cnt == 0:
                            cls.old_top = 0
                            cls.old_left = 0
                            cls.old_bottom = 0
                            cls.old_right = 0

                            cls.IDvalue_car = cls.IDvalue_car + 1
                            cls.ObjID_setimg = cls.IDvalue_car

                    #更新したObjIDを登録する
                    tmp_car = [{'Key':'frame',  'Value':frame_num},
                               {'Key':'id',     'Value':cls.ObjID_setimg},
                               {'Key':'left',   'Value':left},
                               {'Key':'top',    'Value':top},
                               {'Key':'right',  'Value':right},
                               {'Key':'bottom', 'Value':bottom}]
                    cls.all_ObjectID_pos.append(tmp_car)

                    label = '{}_{:.2f}_{}'.format(predicted_class, score, str(cls.ObjID_setimg))#put the ID for each obj
                    draw = ImageDraw.Draw(image)
                    label_size = draw.textsize(label, font)
                    if top - label_size[1] >= 0:
                        text_origin = np.array([left, top - label_size[1]])
                    else:
                        text_origin = np.array([left, top + 1])

                    if cls.old_top - label_size[1] >= 0:
                        text_origin2 = np.array([cls.old_left, cls.old_top - label_size[1]])
                    else:
                        text_origin2 = np.array([cls.old_left, cls.old_top + 1])

                    # My kingdom for a good redistributable image drawing library.
                    for i in range(thickness):
                        draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[0])
                    draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=colors[0])
                    draw.text(text_origin, label, fill=(0, 0, 0), font=font)

                    for i in range(thickness):
                        draw.rectangle([cls.old_left + i, cls.old_top + i, cls.old_right - i, cls.old_bottom - i], outline=colors[1])
                    draw.rectangle([tuple(text_origin2), tuple(text_origin2 + label_size)], fill=colors[1])
                    draw.text(text_origin2, label, fill=(0, 0, 0), font=font)

                    del draw

        end = timer()
        print("1フレームの処理時間 = ", end - start)
        return image
示例#20
0
    def detect_image(cls, image, frame_num):
        start = timer()
      
        model_image_size = (608, 608)
        class_names = cls._get_class()

        new_image_size = (image.width - (image.width % 32),
                                image.height - (image.height % 32))
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
      
        image_shape = [image.size[1], image.size[0]]

        out_boxes, out_scores, out_classes = cls.compute_output(image_data, image_shape)

        Car_result_ALL = []
        Pedestrian_result_ALL = []
        all_result = []

        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = class_names[c]
            box = out_boxes[i]
            score = out_scores[i]
            
            label = '{} {:.2f}'.format(predicted_class, score)

            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            #JSON 形式の時はint32()未対応のため -> int()に変換する
            top = int(top)
            left = int(left)
            bottom = int(bottom)
            right = int(right)

            #2 検出したbox_sizeを計算する 設定した閾値1024pix**2
            sq_bdbox = (bottom - top)*(right - left) 

            if sq_bdbox >= 1024:#矩形サイズの閾値
                if predicted_class == 'Car':
                    cls.IDvalue = cls.IDvalue + 1
                    #車を検出した時
                    Car_result = {'id': int(cls.IDvalue), 'box2d': [left,top,right,bottom]}#予測結果
                    
                    #予測結果より次のFrameの物体位置を予測する情報を作成
                    tmp_car = {'frame':frame_num,'id':int(cls.IDvalue), 'left':left, 'top':top, 'right':right, 'bottom':bottom}
                    cls.all_ObjectID_pos.append(tmp_car)

                    #検出したオブジェクトを格納 検出しない場合は空欄が格納される
                    Car_result_ALL.append(Car_result)#車

                elif predicted_class == 'Pedestrian':
                    cls.IDvalue = cls.IDvalue + 1
                    #歩行者を検出した時
                    Pedestrian_result = {'id': int(cls.IDvalue), 'box2d': [left,top,right,bottom]}#予測結果
              
                    #予測結果より次のFrameの物体位置を予測する情報を作成
                    tmp_ped = {'frame':frame_num,'id':int(cls.IDvalue), 'left':left, 'top':top, 'right':right, 'bottom':bottom}
                    cls.all_ObjectID_pos.append(tmp_ped)

                    #検出したオブジェクトを格納 検出しない場合は空欄が格納される
                    Pedestrian_result_ALL.append(Pedestrian_result)#歩行者
        
        all_result = {'Car': Car_result_ALL, 'Pedestrian': Pedestrian_result_ALL}
        end = timer()
        print("1フレームの処理時間 = ", end - start)
        return all_result
示例#21
0
    def ret_frame(cls, image, cv2image, frame_num):
        # Generate colors for drawing bounding boxes.
        hsv_tuples = [(x / 10, 1., 1.) for x in range(10)]
        colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
        np.random.seed(10101)  # Fixed seed for consistent colors across runs.
        np.random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
        np.random.seed(None)  # Reset seed to default.

        start = timer()
      
        model_image_size = (608, 608)
        class_names = cls._get_class()

        new_image_size = (image.width - (image.width % 32),
                                image.height - (image.height % 32))
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
      
        image_shape = [image.size[1], image.size[0]]

        out_boxes, out_scores, out_classes = cls.compute_output(image_data, image_shape)
        
        if frame_num == 1:#
            cls.old_out_boxes = out_boxes
            cls.old_out_scores = out_scores
            cls.old_out_classes = out_classes
            backward_out_boxes = cls.old_out_boxes
            backward_out_scores = cls.old_out_scores
            backward_out_classes = cls.old_out_classes
        else:
            backward_out_boxes = cls.old_out_boxes
            backward_out_scores = cls.old_out_scores
            backward_out_classes = cls.old_out_classes
            #cls.old_out_boxes = 0 #クリアする
            #cls.old_out_scores = 0 #クリアする
            #cls.old_out_classes = 0 #クリアする
            cls.old_out_boxes = out_boxes#新しい検出結果に更新する
            cls.old_out_scores = out_scores#新しい検出結果に更新する
            cls.old_out_classes = out_classes#新しい検出結果に更新する

        current_pos = []

        font = ImageFont.truetype(font='../box_font/FiraMono-Medium.otf',
                                  size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))

        thickness = (image.size[0] + image.size[1]) // 300
        
        #Check new object or not
        for it, ct in reversed(list(enumerate(out_classes))):
            predicted_class = class_names[ct]
            box = out_boxes[it]
            score = out_scores[it]
            
            print("box = ", box)
            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
            
            boxcent_xpos = int((bottom - top)/2)
            boxcent_ypos = int((right - left)/2)
            
            if frame_num == 1:#1フレーム目は全て追加
                cls.IDvalue = cls.IDvalue + 1#全てObject IDを付与
                tmp = {'ID':cls.IDvalue, 'box_ord':it, 'x':boxcent_xpos, 'y':boxcent_ypos}
                current_pos.append(tmp)
                #LOGGING
                tmp = {'FRNUM':frame_num, 'ID':cls.IDvalue, 'box_ord':it, 'x':boxcent_xpos, 'y':boxcent_ypos}
                cls.all_ObjectID_pos.append(tmp)
                print("No.1 frame box center = ", current_pos)
                
            else:#それ以外前のフレームとの差分で新しい objet IDがあるかチェックする
                #cls.old_out_boxes
                #cls.old_out_scores
                #cls.old_out_classes
                
                for iold, cold in reversed(list(enumerate(cls.old_out_classes))):
                    predicted_class_old = class_names[cold]
                    box_old = out_boxes[iold]
                    score_old = out_scores[iold]
                    
                    top_old, left_old, bottom_old, right_old = box_old
                    top_old = max(0, np.floor(top_old + 0.5).astype('int32'))
                    left_old = max(0, np.floor(left_old + 0.5).astype('int32'))
                    bottom_old = min(image.size[1], np.floor(bottom_old + 0.5).astype('int32'))
                    right_old = min(image.size[0], np.floor(right_old + 0.5).astype('int32'))

                    #今回検出結果が前フレームで検出したBOX範囲内かチェックする
                    if not top_old < boxcent_ypos < bottom_old:
                        if not left_old < boxcent_xpos < right_old:
                            cls.IDvalue = cls.IDvalue + 1
                            tmp = {'ID':cls.IDvalue, 'box_ord':it, 'x':boxcent_xpos, 'y':boxcent_ypos}
                            current_pos.append(tmp)
                            #LOGGING
                            tmp = {'FRNUM':frame_num, 'ID':cls.IDvalue, 'box_ord':it, 'x':boxcent_xpos, 'y':boxcent_ypos}
                            cls.all_ObjectID_pos.append(tmp)
                            print("New object in frame ::box center = ", current_pos)
                    
        #current_pos check
        print("current_pos = ", len(current_pos))#
        for kt in range(len(current_pos)):
            tmp_current_pos = current_pos[kt]
            for k, v in tmp_current_pos.items():
                # k= Tanaka v= 80 // Tanaka: 80
                if k == "ID":
                    print("Key = ", k)
                    print("Value = ",v)             
                elif k == "box_ord":
                    print("Key = ", k)
                    print("Value = ",v)             

        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = class_names[c]
            box = out_boxes[i]
            score = out_scores[i]
            
            label = '{}_{:.2f}_{}'.format(predicted_class, score, str(cls.IDvalue))#put the ID for each obj

            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            #JSON 形式の時はint32()未対応のため -> int()に変換する
            top = int(top)
            left = int(left)
            bottom = int(bottom)
            right = int(right)
            
            #1 予測結果より次のFrameの物体位置を予測
            if len(current_pos) > 0:
                t_tuple = (left, top, int(right - left), int(bottom - top))
                bbox = t_tuple
                #tracker = cv2.TrackerMedianFlow_create()
                tracker = cv2.TrackerKCF_create()
                cls.trackers.add(tracker, cv2image, bbox)

            track, boxes = cls.trackers.update(cv2image)
            
            if track:#trackingに成功したら
                for bbox in boxes:
                    #(x, y, w, h) = [int(v) for v in box]
                    #IDvalue_track = 
                    top_track    = int(bbox[1])
                    left_track   = int(bbox[0])
                    bottom_track = int(bbox[1] + bbox[3])
                    right_track  = int(bbox[0] + bbox[2])
                    
                    chksq_bdbox = (bottom_track - top_track)*(right_track - left_track) 
                    if chksq_bdbox >= 1024:#矩形サイズの閾値
                        if predicted_class == 'Car'or predicted_class == 'Pedestrian':# Car or Pedes
                            draw = ImageDraw.Draw(image)
                            label_size = draw.textsize(label, font)
                            
                            if top - label_size[1] >= 0:
                                text_origin = np.array([left_track, top_track - label_size[1]])
                            else:
                                text_origin = np.array([left_track, top_track + 1])


                            for i in range(thickness):
                                draw.rectangle([left_track + i, top_track + i, right_track - i, bottom_track - i], outline=colors[9])
                            del draw
            #else:#trackingに失敗したら
                #
                #del draw

            #2 検出したbox_sizeを計算する 設定した閾値1024pix**2
            #sq_bdbox = (bottom - top)*(right - left) 

            #if sq_bdbox >= 1024:#矩形サイズの閾値
            #    if predicted_class == 'Car'or predicted_class == 'Pedestrian':# Car or Pedes
                    # My kingdom for a good redistributable image drawing library.
                    #for i in range(thickness):
                    #    draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[c])
                    #draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c])
                    #draw.text(text_origin, label, fill=(0, 0, 0), font=font)
                    #del draw
        
        end = timer()
        print("1フレームの処理時間 = ", end - start)
        return image
示例#22
0
def main(video_path,
         model_path,
         track_target=0,
         visualize=True):
    """run video prediction

    Args:
        video_path:     video path
        model_path:     model path
        track_target:   0-person; 1-bicycle; 2-car; 7-truck
        visualize:      whether visualize tracking list

    """

    detector = Detector(model_path=model_path)
    kalman_filter = KalmanFilter()
    capture = cv2.VideoCapture(video_path)
    height = capture.get(cv2.CAP_PROP_FRAME_HEIGHT)
    width = capture.get(cv2.CAP_PROP_FRAME_WIDTH)

    # tracking list
    tracking_list = []
    label_count = 0
    is_first_frame = True

    while True:
        success, frame = capture.read()

        if not success:
            capture.release()
            break

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # convert to Image object
        frame_pil = Image.fromarray(np.uint8(frame))
        new_frame = letterbox_image(frame_pil, INPUT_SIZE)
        image_array = np.expand_dims(np.array(new_frame, dtype='float32') / 255.0, axis=0)
        image_shape = np.expand_dims(np.array([height, width], dtype='float32'), axis=0)
        image_constant = tf.constant(image_array, dtype=tf.float32)
        image_shape = tf.constant(image_shape, dtype=tf.float32)

        # detect image
        results = detector.detect(image_constant, image_shape)
        pred_results = []
        for key, value in results.items():
            pred_results.append(value)
        boxes = pred_results[0].numpy()
        # scores = scores.numpy
        classes = pred_results[2].numpy()

        # find tracking targets
        track_id = np.where(classes == track_target)[0]
        track_boxes = boxes[track_id]
        num_tracks = len(track_boxes)
        if num_tracks > 0:
            track_boxes = box2xyah(track_boxes)
            track_boxes = [track_box for track_box in track_boxes]

        if not is_first_frame:
            # start tracking
            tracking_list, label_count = matching_cascade(tracking_list, track_boxes,
                                                          kalman_filter, label_count)

        if is_first_frame and (num_tracks > 0):
            is_first_frame = False

            for i in range(num_tracks):
                # initialize first frame
                mean_init, cov_init = kalman_filter.initiate(measurement=track_boxes[i])
                # create tracker
                new_tracker = create_tracker(mean=mean_init,
                                             cov=cov_init,
                                             detection=track_boxes[i])
                tracking_list.append(new_tracker)

        if visualize:
            # visulize results
            img = visualize_results(tracking_list, height, frame)
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            cv2.imshow('avoid invasion', img)
            key = cv2.waitKey(30) & 0xff
            if key == 27:
                capture.release()
                break
示例#23
0
def eval(model_path, min_Iou=0.5, yolo_weights=None):
    """
    Introduction
    ------------
        计算模型在coco验证集上的MAP, 用于评价模型
    """
    ground_truth = {}
    class_pred = defaultdict(list)
    gt_counter_per_class = defaultdict(int)
    input_image_shape = tf.placeholder(dtype=tf.int32, shape=(2, ))
    input_image = tf.placeholder(shape=[None, 416, 416, 3], dtype=tf.float32)
    predictor = yolo_predictor(config.obj_threshold, config.nms_threshold,
                               config.classes_path, config.anchors_path)
    boxes, scores, classes = predictor.predict(input_image, input_image_shape)
    val_Reader = Reader("val",
                        config.data_dir,
                        config.anchors_path,
                        config.num_classes,
                        input_shape=config.input_shape,
                        max_boxes=config.max_boxes)
    image_files, bboxes_data = val_Reader.read_annotations()
    allBBox = 0
    with tf.Session() as sess:
        if yolo_weights is not None:
            with tf.variable_scope('predict'):
                boxes, scores, classes = predictor.predict(
                    input_image, input_image_shape)
            load_op = load_weights(tf.global_variables(scope='predict'),
                                   weights_file=yolo_weights)
            sess.run(load_op)
        else:
            saver = tf.train.Saver()
            ckpt = tf.train.get_checkpoint_state(model_path)
            #saver.restore(sess, model_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
        for index in range(len(image_files)):
            val_bboxes = []
            image_file = image_files[index]
            file_id = os.path.split(image_file)[-1].split('.')[0]
            for bbox in bboxes_data[index]:
                left, top, right, bottom, class_id = bbox[0], bbox[1], bbox[
                    2], bbox[3], bbox[4]
                class_name = val_Reader.class_names[int(class_id)]
                bbox = [float(left), float(top), float(right), float(bottom)]
                val_bboxes.append({
                    "class_name": class_name,
                    "bbox": bbox,
                    "used": False
                })
                gt_counter_per_class[class_name] += 1
            ground_truth[file_id] = val_bboxes
            image = Image.open(image_file)
            resize_image = letterbox_image(image, (416, 416))
            image_data = np.array(resize_image, dtype=np.float32)
            image_data /= 255.
            image_data = np.expand_dims(image_data, axis=0)

            out_boxes, out_scores, out_classes = sess.run(
                [boxes, scores, classes],
                feed_dict={
                    input_image: image_data,
                    input_image_shape: [image.size[1], image.size[0]]
                })
            allBBox += len(out_boxes)
            print("detect {}/{} found boxes: {},allBBox:{}".format(
                index, len(image_files), len(out_boxes), allBBox))
            for o, c in enumerate(out_classes):
                predicted_class = val_Reader.class_names[c]
                box = out_boxes[o]
                score = out_scores[o]

                top, left, bottom, right = box
                top = max(0, np.floor(top + 0.5).astype('int32'))
                left = max(0, np.floor(left + 0.5).astype('int32'))
                bottom = min(image.size[1],
                             np.floor(bottom + 0.5).astype('int32'))
                right = min(image.size[0],
                            np.floor(right + 0.5).astype('int32'))

                bbox = [left, top, right, bottom]
                class_pred[predicted_class].append({
                    "confidence": str(score),
                    "file_id": file_id,
                    "bbox": bbox
                })

    # 计算每个类别的AP
    sum_AP = 0.0
    sum_rec = 0.0
    sum_prec = 0.0
    count_true_positives = {}
    for class_index, class_name in enumerate(
            sorted(gt_counter_per_class.keys())):
        count_true_positives[class_name] = 0
        predictions_data = class_pred[class_name]
        # 该类别总共有多少个box
        nd = len(predictions_data)
        tp = [0] * nd  # true positive
        fp = [0] * nd  # false positive
        for idx, prediction in enumerate(predictions_data):
            file_id = prediction['file_id']
            ground_truth_data = ground_truth[file_id]
            bbox_pred = prediction['bbox']
            Iou_max = -1
            gt_match = None
            for obj in ground_truth_data:
                if obj['class_name'] == class_name:
                    bbox_gt = obj['bbox']
                    bbox_intersect = [
                        max(bbox_pred[0], bbox_gt[0]),
                        max(bbox_gt[1], bbox_pred[1]),
                        min(bbox_gt[2], bbox_pred[2]),
                        min(bbox_gt[3], bbox_pred[3])
                    ]
                    intersect_weight = bbox_intersect[2] - bbox_intersect[0] + 1
                    intersect_high = bbox_intersect[3] - bbox_intersect[1] + 1
                    if intersect_high > 0 and intersect_weight > 0:
                        union_area = (bbox_pred[2] - bbox_pred[0] + 1) * (
                            bbox_pred[3] - bbox_pred[1] +
                            1) + (bbox_gt[2] - bbox_gt[0] +
                                  1) * (bbox_gt[3] - bbox_gt[1] +
                                        1) - intersect_weight * intersect_high
                        Iou = intersect_high * intersect_weight / union_area
                        if Iou > Iou_max:
                            Iou_max = Iou
                            gt_match = obj
            if Iou_max > min_Iou:
                if not gt_match['used'] and gt_match is not None:
                    tp[idx] = 1
                    gt_match['used'] = True
                else:
                    fp[idx] = 1
            else:
                fp[idx] = 1
        # 计算精度和召回率
        sum_class = 0
        for idx, val in enumerate(fp):
            fp[idx] += sum_class
            sum_class += val
        sum_class = 0
        for idx, val in enumerate(tp):
            tp[idx] += sum_class
            sum_class += val
        rec = tp[:]
        for idx, val in enumerate(tp):
            rec[idx] = tp[idx] / gt_counter_per_class[class_name]
        prec = tp[:]
        for idx, val in enumerate(tp):
            prec[idx] = tp[idx] / (fp[idx] + tp[idx])

        ap, mrec, mprec = voc_ap(rec, prec)
        sum_AP += ap
        sum_rec += (mrec[-2])
        sum_prec += sum(mprec) / (allBBox + 2)
        f1 = 2 * sum_rec * sum_prec / (sum_rec + sum_prec)

    MAP = sum_AP / len(gt_counter_per_class) * 100
    #rec = sum_rec / len(gt_counter_per_class) * 100
    #prec = sum_prec / len(gt_counter_per_class) * 100
    print("The Model Eval MAP: {},prec:{},rec:{},f1:{}".format(
        MAP, sum_prec, sum_rec, f1))
def detect_video(weights, video_filepath, img_size, conf_thres, iou_thres):

    start_time = time.time()

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video = cv2.VideoCapture(video_filepath)
    fps = video.get(cv2.CAP_PROP_FPS)
    h = int(video.get(3))
    w = int(video.get(4))
    print(w, h)
    #h = 1280
    #w = 720
    result_video_filepath = video_filepath.split('/')[-1].split(
        '.')[0] + 'yolov5_output.mp4'
    out = cv2.VideoWriter(result_video_filepath, fourcc, int(fps), (h, w))

    yolov5_tflite_obj = yolov5_tflite(weights, img_size, conf_thres, iou_thres)

    size = (img_size, img_size)
    no_of_frames = 0

    while True:

        check, frame = video.read()

        if not check:
            break
        #frame = cv2.resize(frame,(h,w))
        no_of_frames += 1
        image_resized = letterbox_image(Image.fromarray(frame), size)
        image_array = np.asarray(image_resized)

        normalized_image_array = image_array.astype(np.float32) / 255.0
        result_boxes, result_scores, result_class_names = yolov5_tflite_obj.detect(
            normalized_image_array)

        if len(result_boxes) > 0:
            result_boxes = scale_coords(size, np.array(result_boxes), (w, h))
            font = cv2.FONT_HERSHEY_SIMPLEX

            # org
            org = (20, 40)

            # fontScale
            fontScale = 0.5

            # Blue color in BGR
            color = (0, 255, 0)

            # Line thickness of 1 px
            thickness = 1

            for i, r in enumerate(result_boxes):

                org = (int(r[0]), int(r[1]))
                cv2.rectangle(frame, (int(r[0]), int(r[1])),
                              (int(r[2]), int(r[3])), (255, 0, 0), 1)
                cv2.putText(
                    frame,
                    str(int(100 * result_scores[i])) + '%  ' +
                    str(result_class_names[i]), org, font, fontScale, color,
                    thickness, cv2.LINE_AA)

        out.write(frame)
        print('FPS:', no_of_frames / (time.time() - start_time))
    out.release()
示例#25
0
    # print(data)
    # print(lr_img)
    model = yolo(config.norm_epsilon, config.norm_decay, '../model_data/yolo_anchors.txt',
                 '../model_data/coco_classes.txt',
                 config.pre_train)
    g1 = model.GAN_g1(lr_img)
    g2 = model.GAN_g2(g1)
    out = model.yolo_inference(g2.outputs, config.num_anchors / 3, config.num_classes, training=True)


    # tf.summary.scalar('out', out)
    # tf.summary.scalar('g1', g1.outputs)
    # merged_summary_op = tf.summary.merge_all()

    data1 = Image.open("../dog.jpg")
    data1 = utils.letterbox_image(data1, (104, 104))
    data1 = np.array(data1, dtype=np.float32)
    data1 /= 255.
    data1 = np.expand_dims(data1, axis=0)
    # print(data1.shape)

    # loss = model.yolo_loss(output, bbox_true, model.anchors, config.num_classes, config.ignore_thresh)


    with tf.Session() as sess:
        # data1 = cv2.imread('../dog.jpg')
        # data1 = cv2.cvtColor(data1, cv2.COLOR_BGR2RGB)
        # data1 = cv2.resize(data1, (416, 416))
        # lr_img1 = cv2.resize(data1, (104, 104), interpolation=cv2.INTER_CUBIC)
        # data1 = tf.cast(tf.expand_dims(tf.constant(data1), 0), tf.float32)
        # lr_img1 = tf.cast(tf.expand_dims(tf.constant(lr_img1), 0), tf.float32)
示例#26
0
def table_detect(img, sc=(416, 416), thresh=0.5, NMSthresh=0.3):
    """
    表格检测
    :param img: GBR, 要检测的图片
    :param sc: 预处理后图像的目标尺寸,一般有几个建议的值
    :param thresh: 置信度阈值,大于此置信度的才保留
    :param NMSthresh: 极大值抑制阈值
    :return:
    """
    scale = sc[0]
    #获取img的前2位,图片的高度和宽度
    img_height, img_width = img.shape[:2]
    # 输入的Blob bbox, 新的宽度和原宽度的比值, 新的高度和原高度的比值
    inputBlob, fx, fy = letterbox_image(img[..., ::-1], (scale, scale))
    # 对输入图像进行预处理,均值,缩放,通道交互[H,W,C]-->[B,C,H,W]
    inputBlob = cv2.dnn.blobFromImage(inputBlob, scalefactor=1.0, size=(scale, scale), swapRB=True, crop=False);
    #设置模型的输入
    tableDetectNet.setInput(inputBlob / 255.0)
    # 返回没有连接的输出的layer的名字,
    outputName = tableDetectNet.getUnconnectedOutLayersNames()
    # 运行前向计算,计算OutputName的layers的输出, outputs输出结果的列表
    outputs = tableDetectNet.forward(outputName)
    #存放类别id,置信度,bbox
    class_ids = []
    confidences = []
    boxes = []
    #对于多个输出结果过滤
    for output in outputs:
        #处理每个结果, detection输出格式是[centerx,centery,w,h,xxxx, class1_confidence, class2_confidence]
        # centerx 是bbox中心点坐标,w,h是bbox的宽和高
        for detection in output:
            #第5个和第6个是对每个类别的预测的置信度
            scores = detection[5:]
            #置信度最大的index是对应的是类别id
            class_id = np.argmax(scores)
            #获取对应的置信度
            confidence = scores[class_id]
            #检查置信度是否大于阈值
            if confidence > thresh:
                #还原到原图像的x,y,w,h
                center_x = int(detection[0] * scale / fx)
                center_y = int(detection[1] * scale / fy)
                width = int(detection[2] * scale / fx)
                height = int(detection[3] * scale / fy)
                #bbox左顶点(x,y),这里用left是x,top是y
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                # 如果类别id是1
                if class_id == 1:
                    class_ids.append(class_id)
                    confidences.append(float(confidence))
                    #计算bbox左上角和右下角的点的坐标
                    xmin, ymin, xmax, ymax = left, top, left + width, top + height
                    xmin = max(xmin, 1)
                    ymin = max(ymin, 1)
                    xmax = min(xmax, img_width - 1)
                    ymax = min(ymax, img_height - 1)
                    boxes.append([xmin, ymin, xmax, ymax])
    #bboxes的列表
    boxes = np.array(boxes)
    #对应的confidences列表
    confidences = np.array(confidences)
    #NMS非极大值抑制过滤bbox
    if len(boxes) > 0:
        boxes, confidences = nms_box(boxes, confidences, score_threshold=thresh, nms_threshold=NMSthresh)

    boxes, adBoxes = fix_table_box_for_table_line(boxes, confidences, img)
    return boxes, adBoxes, confidences
示例#27
0
def detect(model_path, yolo_weights=None, image_path=None):
    """
    Introduction
    ------------
        加载模型,进行预测
    Parameters
    ----------
        model_path: 模型路径
        image_path: 图片路径
    """
    cap = None
    if image_path == None:
        cap = cv2.VideoCapture(0)
    input_image_shape = tf.placeholder(dtype=tf.int32, shape=(2, ))
    input_image = tf.placeholder(shape=[None, 416, 416, 3], dtype=tf.float32)
    predictor = yolo_predictor(config.obj_threshold, config.nms_threshold,
                               config.classes_path, config.anchors_path)
    boxes, scores, classes = predictor.predict(input_image, input_image_shape)
    with tf.Session() as sess:
        if yolo_weights is not None:
            with tf.variable_scope('predict'):
                boxes, scores, classes = predictor.predict(
                    input_image, input_image_shape)
            load_op = load_weights(tf.global_variables(scope='predict'),
                                   weights_file=yolo_weights)
            sess.run(load_op)
        else:
            saver = tf.train.Saver()
            saver.restore(
                sess,
                "./test_model/model.ckpt-192192/model.ckpt-44865")  # emotion
            # saver.restore(sess, "./test_model/model.ckpt-192192/model.ckpt-19940") # detection
        while True:
            start_time = time.time()
            if image_path == None:
                ret, image = cap.read()
                if ret == 0:
                    break
                [h, w] = image.shape[:2]
                print(h, w)
                image = cv2.flip(image, 1)
                image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = Image.fromarray(image_np)
            else:
                image = Image.open(image_path)
            resize_image = letterbox_image(image, (416, 416))
            image_data = np.array(resize_image, dtype=np.float32)
            image_data /= 255.
            image_data = np.expand_dims(image_data, axis=0)

            out_boxes, out_scores, out_classes = sess.run(
                [boxes, scores, classes],
                feed_dict={
                    input_image: image_data,
                    input_image_shape: [image.size[1], image.size[0]]
                })
            print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
            font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
                                      size=np.floor(3e-2 * image.size[1] +
                                                    0.5).astype('int32'))
            thickness = (image.size[0] + image.size[1]) // 300

            for i, c in reversed(list(enumerate(out_classes))):
                c = int(c[0])
                print("i:{}, c:{}, type:{}".format(i, c, type(c)))
                if c > 2:
                    continue

                predicted_class = predictor.class_names[c]
                box = out_boxes[i]
                score = out_scores[i]

                label = '{} {:.2f}'.format(predicted_class, score)
                draw = ImageDraw.Draw(image)
                label_size = draw.textsize(label, font)

                top, left, bottom, right = box
                top = max(0, np.floor(top + 0.5).astype('int32'))
                left = max(0, np.floor(left + 0.5).astype('int32'))
                bottom = min(image.size[1],
                             np.floor(bottom + 0.5).astype('int32'))
                right = min(image.size[0],
                            np.floor(right + 0.5).astype('int32'))
                print(label, (left, top), (right, bottom))

                if top - label_size[1] >= 0:
                    text_origin = np.array([left, top - label_size[1]])
                else:
                    text_origin = np.array([left, top + 1])
                duration = time.time() - start_time
                # My kingdom for a good redistributable image drawing library.
                for i in range(thickness):
                    draw.rectangle([left + i, top + i, right - i, bottom - i],
                                   outline=predictor.colors[c])
                draw.rectangle(
                    [tuple(text_origin),
                     tuple(text_origin + label_size)],
                    fill=predictor.colors[c])
                frame_rate = '{:.2f}'.format(1.0 / duration)
                draw.text(text_origin, label, fill=(0, 0, 0), font=font)
                draw.text(np.array([0, 0]),
                          frame_rate,
                          fill=(0, 0, 0),
                          font=font)
                del draw
            # image.show()
            # image.save('./result1.jpg')
            # cv_img = cv2.CreateImageHeader(image.size, cv2.IPL_DEPTH_8U, 3)  # RGB image
            # cv2.SetData(cv_img, image.tostring(), image.size[0]*3)

            if image_path != None:
                print('just one image')
                image.show()
                image.save('./result1.jpg')
                break
            else:
                open_cv_image = np.array(image)[:, :, ::-1].copy()
                cv2.imshow('cimage', open_cv_image)
                k = cv2.waitKey(1) & 0xff
                if k == ord('q') or k == 27:
                    break
示例#28
0
def detect(image_path, model_path, yolo_weights=None):
    """
    Introduction
    ------------
        加载模型,进行预测
    Parameters
    ----------
        model_path: 模型路径
        image_path: 图片路径
    """
    image = Image.open(image_path)
    resize_image = letterbox_image(image, (192, 192))
    image_data = np.array(resize_image, dtype=np.float32)
    image_data /= 255.
    image_data = np.expand_dims(image_data, axis=0)
    input_image_shape = tf.placeholder(dtype=tf.int32, shape=(2, ))
    input_image = tf.placeholder(shape=[None, 192, 192, 3], dtype=tf.float32)

    with tf.variable_scope("model_gd"):
        predictor = yolo_predictor(config.obj_threshold, config.nms_threshold,
                                   config.classes_path, config.anchors_path2)
        boxes, scores, classes = predictor.predict(input_image,
                                                   input_image_shape)

    with tf.Session() as sess:
        if yolo_weights is not None:
            with tf.variable_scope('predict'):
                boxes, scores, classes = predictor.predict(
                    input_image, input_image_shape)
            load_op = load_weights(tf.global_variables(scope='predict'),
                                   weights_file=yolo_weights)
            sess.run(load_op)
        else:
            saver = tf.train.Saver()
            model_file = tf.train.latest_checkpoint(model_path)
            saver.restore(sess, model_file)
        out_boxes, out_scores, out_classes = sess.run(
            [boxes, scores, classes],
            feed_dict={
                input_image: image_data,
                input_image_shape: [image.size[1], image.size[0]]
            })
        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
        font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
                                  size=np.floor(3e-2 * image.size[1] +
                                                0.5).astype('int32'))
        thickness = (image.size[0] + image.size[1]) // 300

        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = predictor.class_names[c]
            box = out_boxes[i]
            score = out_scores[i]

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)

            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            data = str(label) + "," + str(left) + "." + str(top) + "," + str(
                right) + "," + str(bottom) + "\n"

            with open('./res/data.txt', "a") as f:
                f.write(data)

            print(label, (left, top), (right, bottom))

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            # My kingdom for a good redistributable image drawing library.
            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=predictor.colors[c])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=predictor.colors[c])
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)
            del draw
        image.show()
        image.save('./res/1.jpg')
示例#29
0
    def detect_image(cls, image):
        start = timer()
      
        model_image_size = (608, 608)
        class_names = cls._get_class()

        new_image_size = (image.width - (image.width % 32),
                                image.height - (image.height % 32))
        boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
      
        image_shape = [image.size[1], image.size[0]]

        out_boxes, out_scores, out_classes = cls.compute_output(image_data, image_shape)

        Car_result_ALL = []
        Pedestrian_result_ALL = []
        all_result = []

        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = class_names[c]
            box = out_boxes[i]
            score = out_scores[i]
            
            label = '{} {:.2f}'.format(predicted_class, score)

            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))

            #JSON 形式の時はint32()未対応のため -> int()に変換する
            top = int(top)
            left = int(left)
            bottom = int(bottom)
            right = int(right)
         
            #1 予測結果より次のFrameの物体位置を予測
            #nxt_result_txt = ' {},{},{},{},{}'.format(left, top, right, bottom, c)
            
            #center = (int((bottom - top)/2), int((right - left)/2))
            #center = np.array([int((bottom - top)/2), 1, int((right - left)/2)], dtype=np.int32)
            #cls.tracker.update(center)
            
            #for j in range(len(cls.tracker.tracks)):
            #    x = int(cls.tracker.tracks[j].trace[-1][0,0])
            #    y = int(cls.tracker.tracks[j].trace[-1][0,1])
            #    print("x=",x)
            #    print("y=",y)
            
            #2 検出したbox_sizeを計算する 設定した閾値1024pix**2
            sq_bdbox = (bottom - top)*(right - left) 

            if sq_bdbox >= 1024:#矩形サイズの閾値
                #検出しない時の初期値
                #Car_result = {'id': int(0), 'box2d': [int(0),int(0),int(image.height),int(image.width)]}
                #Pedestrian_result = {'id': int(0), 'box2d': [int(0),int(0),int(image.height),int(image.width)]}

                if predicted_class == 'Car':
                    #車を検出した時
                    Car_result = {'id': int(cls.IDvalue), 'box2d': [left,top,right,bottom]}#予測結果

                    #検出したオブジェクトを格納 検出しない場合は初期値0が格納される
                    Car_result_ALL.append(Car_result)#車

                elif predicted_class == 'Pedestrian':
                    #歩行者を検出した時
                    Pedestrian_result = {'id': int(cls.IDvalue), 'box2d': [left,top,right,bottom]}#予測結果
              
                    #検出したオブジェクトを格納 検出しない場合は初期値0が格納される
                    Pedestrian_result_ALL.append(Pedestrian_result)#歩行者
        
        all_result = {'Car': Car_result_ALL, 'Pedestrian': Pedestrian_result_ALL}
        end = timer()
        print("1フレームの処理時間 = ", end - start)
        return all_result
示例#30
0
import os
import config
import argparse
import numpy as np
import colorsys
import tensorflow as tf
from yolo_predict import yolo_predictor
from PIL import Image, ImageFont, ImageDraw
from utils import letterbox_image, load_weights

with tf.Session() as sess:
    image_path = "F:\\deeplearning_dataset\\new_ribbon\\split_imge\\1109_(98)_1.jpg"
    ##########数据准备阶段################
    image = Image.open(image_path)
    resize_image = letterbox_image(image, (416, 416))
    image_data = np.array(resize_image, dtype=np.float32)
    image_data /= 255.
    image_data = np.expand_dims(image_data, axis=0)
    #####################################

    pb_file_path = 'F:\\github_working\\version_2_190114\\alsochen-tensorflow-yolo3-threeoutput\\tensorflow-yolo3\\pb_file\\model.pb'
    with tf.gfile.GFile(pb_file_path, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        pred_im_shape, pred_input_img, boxes, scores, classes = tf.graph_util.import_graph_def(
            graph_def,
            return_elements=[
                'pred_im_shape:0', 'pred_input_img:0', 'predict/pred_boxes:0',
                'predict/pred_scores:0', 'predict/pred_classes:0'
            ])
        out_boxes, out_scores, out_classes = sess.run(