def detect_image(self,image_id,image):
        self.confidence = 0.05
        f = open("./input/detection-results/"+image_id+".txt","w") 
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(letterbox_image(image, (self.model_image_size[0],self.model_image_size[1])))
        photo = np.array(crop_img,dtype = np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)
            
        output_list = []
        for i in range(2):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output, len(self.class_names),
                                                conf_thres=self.confidence,
                                                nms_thres=0.3)

        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image
            
        top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
        top_label = np.array(batch_detections[top_index,-1],np.int32)
        top_bboxes = np.array(batch_detections[top_index,:4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)

        # 去掉灰条
        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))

        f.close()
        return 
    def predict(self,image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(letterbox_image(image, (self.input_shape[0], self.input_shape[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output, len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)

        batch_detections = batch_detections[0].cpu().numpy()

        top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), \
                                                 np.expand_dims(top_bboxes[:, 1], -1), \
                                                 np.expand_dims(top_bboxes[:, 2], -1), \
                                                 np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax,
                                   np.array([self.input_shape[0], self.input_shape[1]]), image_shape)

        return boxes,top_label,top_conf
示例#3
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(letterbox_image(image, (self.model_image_size[0],self.model_image_size[1])))
        photo = np.array(crop_img,dtype = np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)
            
        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output, len(self.class_names),
                                                conf_thres=self.confidence,
                                                nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image
            
        top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
        top_label = np.array(batch_detections[top_index,-1],np.int32)
        top_bboxes = np.array(batch_detections[top_index,:4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)

        # 去掉灰条
        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)
            
            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle(
                    [left + i, top + i, right - i, bottom - i],
                    outline=self.colors[self.class_names.index(predicted_class)])
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
            del draw
        return image
示例#4
0
    def detect_image(self, image, aligned_depth_frame=None, color_intrin_part=None, mode=1):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(letterbox_image(image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output, len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image

        top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1],
                                                                                                      -1), np.expand_dims(
            top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax,
                                   np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))
            # print(np.shape(image)[0], np.shape(image)[1])
            # print("left:{}, top:{}, right:{}, bottom:{}".format(left, top, right, bottom))
            fail = 0
            if (aligned_depth_frame and color_intrin_part):
                ppx = color_intrin_part[0]
                ppy = color_intrin_part[1]
                fx = color_intrin_part[2]
                fy = color_intrin_part[3]
                width = aligned_depth_frame.width
                height = aligned_depth_frame.height

# ----------------------------------------------------------------------------------------------------------------
# 1、取中心点像素深度
# ----------------------------------------------------------------------------------------------------------------
                if mode == 1:
                    center_x = int(round((left + right) / 2))
                    center_y = int(round((top + bottom) / 2))
                    # print("center:", center_x, center_y)
                    # print("depth size:", width, height)
                    center_x = min(max(1, center_x), width - 1)
                    center_y = min(max(1, center_y), height - 1)
                    # print("center_after:", center_x, center_y)
                    # center_x = min(max(0,center_x),width)
                    # center_y = min(max(0,center_y),height)
                    target_xy_pixel = [center_x, center_y]
                    target_depth = aligned_depth_frame.get_distance(target_xy_pixel[0], target_xy_pixel[1])
                    strDistance = "\n%.2f m" % target_depth
                    target_xy_true = [(target_xy_pixel[0] - ppx) * target_depth / fx,
                                      (target_xy_pixel[1] - ppy) * target_depth / fy]

# # ----------------------------------------------------------------------------------------------------------------
# # 2、取box里面所有像素深度值后平均
# # ----------------------------------------------------------------------------------------------------------------
#                 elif mode == 2:
#                     depth = 0
#                     cnt = 0
#                     depth_matrix = np.zeros((width, height))
#                     for x in range(left, right):
#                         for y in range(top, bottom):
#                             depth_matrix[x][y] = aligned_depth_frame.get_distance(x, y)
#                             # print("x:{}, y:{}".format(x,y),depth_matrix[x][y])
#                             depth += depth_matrix[x][y]
#                             cnt += 1
#                     target_depth = depth / cnt
#                     minn = 1000000
#                     pseudo_x = 0
#                     pseudo_y = 0
#                     for x in range(left, right):
#                         for y in range(top, bottom):
#                             if minn > abs(depth_matrix[x][y] - target_depth):
#                                 minn = abs(depth_matrix[x][y] - target_depth)
#                                 pseudo_x = x
#                                 pseudo_y = y
#                     target_xy_pixel = [pseudo_x, pseudo_y]
#                     strDistance = " depth: %.2f m" % target_depth
#                     target_xy_true = [(pseudo_x - ppx) * target_depth / fx,
#                                       (pseudo_y - ppy) * target_depth / fy]
#
# # ----------------------------------------------------------------------------------------------------------------
# # 3、去前后百分之十的极值后再平均
# # ----------------------------------------------------------------------------------------------------------------
#                 elif mode == 3:
#                     depth = 0
#                     cnt = 0
#                     depth_matrix = np.zeros((width, height))
#                     for x in range(left, right):
#                         for y in range(top, bottom):
#                             depth_matrix[x][y] = aligned_depth_frame.get_distance(x, y)
#
#                     depth_matrix_flat = depth_matrix[left:right, top:bottom].reshape((right - left) * (bottom - top), )
#                     matrix_flat_len = depth_matrix_flat.shape[0]
#                     drop_len = int(matrix_flat_len * 0.1)
#                     depth_matrix_flat.sort()
#                     depth_matrix_flat = depth_matrix_flat[drop_len:-drop_len]
#                     depth = depth_matrix_flat.sum()
#
#                     target_depth = depth / (matrix_flat_len - 2 * drop_len)
#                     minn = 1000000
#                     pseudo_x = 0
#                     pseudo_y = 0
#                     for x in range(left, right):
#                         for y in range(top, bottom):
#                             if minn > abs(depth_matrix[x][y] - target_depth):
#                                 minn = abs(depth_matrix[x][y] - target_depth)
#                                 pseudo_x = x
#                                 pseudo_y = y
#                     target_xy_pixel = [pseudo_x, pseudo_y]
#                     strDistance = " depth: %.2f m" % target_depth
#                     target_xy_true = [(pseudo_x - ppx) * target_depth / fx,
#                                       (pseudo_y - ppy) * target_depth / fy]
#
# # ----------------------------------------------------------------------------------------------------------------
# # 4、去掉深度缺失的像素(深度为0)后再平均
# # ----------------------------------------------------------------------------------------------------------------
#                 elif mode == 4:
#                     depth = 0
#                     cnt = 0
#                     depth_matrix = np.zeros((width, height))
#                     for x in range(left, right):
#                         for y in range(top, bottom):
#                             depth_matrix[x][y] = aligned_depth_frame.get_distance(x, y)
#                             if depth_matrix[x][y] > 0:
#                                 depth += depth_matrix[x][y]
#                                 cnt += 1
#                     if cnt == 0:
#                         print("该目标框内所有像素均检测缺失,无法计算深度")
#                         fail = 1
#                     else:
#                         target_depth = depth / cnt
#                         minn = 1000000
#                         pseudo_x = 0
#                         pseudo_y = 0
#                         for x in range(left, right):
#                             for y in range(top, bottom):
#                                 if minn > abs(depth_matrix[x][y] - target_depth):
#                                     minn = abs(depth_matrix[x][y] - target_depth)
#                                     pseudo_x = x
#                                     pseudo_y = y
#                         target_xy_pixel = [pseudo_x, pseudo_y]
#                         strDistance = " depth: %.2f m" % target_depth
#                         target_xy_true = [(pseudo_x - ppx) * target_depth / fx,
#                                           (pseudo_y - ppy) * target_depth / fy]

            else:
                strDistance = "\n 0 m"

            # 画框框----------------------------------------------------------------------------------------------------
            if fail == 0:
                label = '{} {:.2f}'.format(predicted_class, score)
                label = label + strDistance
                draw = ImageDraw.Draw(image)
                label_size = draw.textsize(label, font)
                label = label.encode('utf-8')
                # print(label)
                print('检测出目标:{} ;实际坐标为(m):({:.3f}, {:.3f}, {:.3f}) \n中心点像素坐标(pixel):({}, {}) ;中心点相机坐标(m):({},{});深度: {} m\n'.format(predicted_class,
                                                                                                target_xy_true[0],target_xy_true[1],target_depth,
                                                                                                target_xy_pixel[0],
                                                                                                target_xy_pixel[1],
                                                                                                target_xy_true[0],
                                                                                                target_xy_true[1],
                                                                                                target_depth))

                if top - label_size[1] >= 0:
                    text_origin = np.array([left, top - label_size[1]])
                else:
                    text_origin = np.array([left, top + 1])

                for i in range(thickness):
                    draw.rectangle(
                        [left + i, top + i, right - i, bottom - i],
                        outline=self.colors[self.class_names.index(predicted_class)])
                draw.rectangle(
                    [tuple(text_origin), tuple(text_origin + label_size)],
                    fill=self.colors[self.class_names.index(predicted_class)])
                draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
                del draw
        return image
示例#5
0
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[1], self.model_image_size[0])))
        photo = np.array(crop_img, dtype=np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        images = [photo]

        with torch.no_grad():
            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)
            try:
                batch_detections = batch_detections[0].cpu().numpy()
                top_index = batch_detections[:,
                                             4] * batch_detections[:,
                                                                   5] > self.confidence
                top_conf = batch_detections[top_index,
                                            4] * batch_detections[top_index, 5]
                top_label = np.array(batch_detections[top_index, -1], np.int32)
                top_bboxes = np.array(batch_detections[top_index, :4])
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                    top_bboxes[:, 0],
                    -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                        top_bboxes[:, 2],
                        -1), np.expand_dims(top_bboxes[:, 3], -1)
                # 去掉灰条
                boxes = yolo_correct_boxes(
                    top_ymin, top_xmin, top_ymax, top_xmax,
                    np.array(
                        [self.model_image_size[0], self.model_image_size[1]]),
                    image_shape)
            except:
                pass

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                outputs = self.net(images)
                output_list = []
                for i in range(3):
                    output_list.append(self.yolo_decodes[i](outputs[i]))
                output = torch.cat(output_list, 1)
                batch_detections = non_max_suppression(
                    output,
                    len(self.class_names),
                    conf_thres=self.confidence,
                    nms_thres=self.iou)
                try:
                    batch_detections = batch_detections[0].cpu().numpy()
                    top_index = batch_detections[:,
                                                 4] * batch_detections[:,
                                                                       5] > self.confidence
                    top_conf = batch_detections[
                        top_index, 4] * batch_detections[top_index, 5]
                    top_label = np.array(batch_detections[top_index, -1],
                                         np.int32)
                    top_bboxes = np.array(batch_detections[top_index, :4])
                    top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                        top_bboxes[:, 0], -1), np.expand_dims(
                            top_bboxes[:, 1], -1), np.expand_dims(
                                top_bboxes[:, 2],
                                -1), np.expand_dims(top_bboxes[:, 3], -1)
                    # 去掉灰条
                    boxes = yolo_correct_boxes(
                        top_ymin, top_xmin, top_ymax, top_xmax,
                        np.array([
                            self.model_image_size[0], self.model_image_size[1]
                        ]), image_shape)
                except:
                    pass

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
    def detect_image(self, image_id, image):
        self.confidence = 0.01
        self.iou = 0.5
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        image_shape = np.array(np.shape(image)[0:2])

        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(
                letterbox_image(
                    image,
                    (self.model_image_size[1], self.model_image_size[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize(
                (self.model_image_size[1], self.model_image_size[0]),
                Image.BICUBIC)
        photo = np.array(crop_img, dtype=np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        images = [photo]

        with torch.no_grad():
            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))

            #---------------------------------------------------------#
            #   将预测框进行堆叠,然后进行非极大抑制
            #---------------------------------------------------------#
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output,
                                                   self.num_classes,
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)

            #---------------------------------------------------------#
            #   如果没有检测出物体,返回
            #---------------------------------------------------------#
            try:
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return

            #---------------------------------------------------------#
            #   对预测框进行得分筛选
            #---------------------------------------------------------#
            top_index = batch_detections[:,
                                         4] * batch_detections[:,
                                                               5] > self.confidence
            top_conf = batch_detections[top_index,
                                        4] * batch_detections[top_index, 5]
            top_label = np.array(batch_detections[top_index, -1], np.int32)
            top_bboxes = np.array(batch_detections[top_index, :4])
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            #-----------------------------------------------------------------#
            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
            #   因此生成的top_bboxes是相对于有灰条的图像的
            #   我们需要对其进行修改,去除灰条的部分。
            #-----------------------------------------------------------------#
            if self.letterbox_image:
                boxes = yolo_correct_boxes(
                    top_ymin, top_xmin, top_ymax, top_xmax,
                    np.array(
                        [self.model_image_size[0], self.model_image_size[1]]),
                    image_shape)
            else:
                top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
                top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
                top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
                top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
                boxes = np.concatenate(
                    [top_ymin, top_xmin, top_ymax, top_xmax], axis=-1)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
示例#7
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[1], self.model_image_size[0])))
        photo = np.array(crop_img, dtype=np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        images = [photo]

        with torch.no_grad():
            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))

            #---------------------------------------------------------#
            #   将预测框进行堆叠,然后进行非极大抑制
            #---------------------------------------------------------#
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)

            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            try:
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return image

            #---------------------------------------------------------#
            #   对预测框进行得分筛选
            #---------------------------------------------------------#
            top_index = batch_detections[:,
                                         4] * batch_detections[:,
                                                               5] > self.confidence
            top_conf = batch_detections[top_index,
                                        4] * batch_detections[top_index, 5]
            top_label = np.array(batch_detections[top_index, -1], np.int32)
            top_bboxes = np.array(batch_detections[top_index, :4])
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            #-----------------------------------------------------------------#
            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
            #   因此生成的top_bboxes是相对于有灰条的图像的
            #   我们需要对其进行修改,去除灰条的部分。
            #-----------------------------------------------------------------#
            boxes = yolo_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.model_image_size[0], self.model_image_size[1]]),
                image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max((np.shape(image)[0] + np.shape(image)[1]) //
                        self.model_image_size[0], 1)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
示例#8
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output,
                                               len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image

        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            # top = top - 250
            # left = left - 250
            # bottom = bottom + 250
            # right = right + 250
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5
            # 从左上角开始 剪切 200*200的图片
            img2 = image.crop((left, top, right, bottom))
            img2.save("lena2.jpg")
            top = max(0, np.floor(top).astype('int32'))
            left = max(0, np.floor(left).astype('int32'))
            bottom = min(np.shape(image)[0], np.floor(bottom).astype('int32'))
            right = min(np.shape(image)[1], np.floor(right).astype('int32'))

            # 画框框
            # 画框框
            if predicted_class == 'person':
                predicted_class_ch = "Ren"
            elif predicted_class == 'chair':
                predicted_class_ch = "椅子"
            elif predicted_class == 'clock':
                predicted_class_ch = "钟"
            elif predicted_class == 'tie':
                predicted_class_ch = "厂牌吗??"
            elif predicted_class == 'cell phone':
                predicted_class_ch = "手机"
            elif predicted_class == 'laptop':
                predicted_class_ch = "笔记本电脑"
            elif predicted_class == 'QR':
                predicted_class_ch = "2维码"
            else:
                predicted_class_ch = "单号"
            label = '{} {} {:.2f} {}'.format(predicted_class_ch, '置信度', score,
                                             '%')
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 3])

            for i in range(thickness):
                draw.rectangle(  #边框
                    [left + i, top + i, right - i, bottom - i],
                    outline=self.colors[c])
            #draw.rectangle(
            #  [tuple(text_origin), tuple(text_origin)+ label_size],
            #  fill=self.colors[c])Y1909170500-F1-1568720302878.jpg 不行
            # 绘制文本E:\发货单\截图20200727212747.png
            # font = ImageFont.truetype("consola.ttf", 40, encoding="unic")  # 设置字体
            # draw.text((100, 50), u'Hello World', 'fuchsia', font)

            #draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
            #del draw
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[c])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        return image
示例#9
0
    def generate_box(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(letterbox_image(image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)

        images = np.asarray(images)
        images = torch.from_numpy(images)
        if self.cuda:
            images = images.cuda()

        with torch.no_grad():
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output, self.config["yolo"]["classes"],
                                                   conf_thres=self.confidence,
                                                   nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            boxlist = []
            return boxlist
        top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1],
                                                                                                      -1), np.expand_dims(
            top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax,
                                   np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]

        boxlist = []
        for i, c in enumerate(top_label):
            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))

            box_str = str(left) + ',' + str(top) + ',' + str(right) + ',' + str(bottom)
            boxlist.append(box_str)


        return boxlist
示例#10
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0  # 归一化
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output,
                                               len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return []

        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        l = []
        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))
            l.append([left, top, right, bottom, score, predicted_class])

        return l
    def detect_image(self, image):
        # embed()
        image_shape = np.array(np.shape(image)[0:2])
        num_class = len(self.class_names)  # 有80类
        # embed()

        #---------------------------------------------------------#
        #   给图像增加灰条(什么是灰条),实现不失真的resize
        #---------------------------------------------------------#
        # 复制image return new_image
        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[1], self.model_image_size[0])))
        photo = np.array(crop_img, dtype=np.float32) / 255.0  # 归一化?
        photo = np.transpose(
            photo, (2, 0, 1)
        )  # 转置:将Image.open(img)得到的[H,W,C]格式转换permute为pytorch可以处理的[C,H,W]格式
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        images = [photo]  # 将photo变为list类型

        with torch.no_grad(
        ):  # disabled gradient calculation,reduce memory consumption for computations
            images = torch.from_numpy(
                np.asarray(images)
            )  # Creates a Tensor from a numpy.ndarray,此时images的shape为[1, 3, 416, 416]
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            # embed()
            # 从这里开始处理
            # 特征提取
            # 输出outputs为tuple,len=3,每个tensor的shape分别为 第一个特征层[1, 255, 13, 13],第二个特征层[1, 255, 26, 26],第三个特征层[1, 255, 52, 52]
            outputs = self.net(images)
            # embed()
            output_list = []
            for i in range(3):  # 为什么是3
                # 有三个特征层,每个特征层对应自己的decode解码器
                output_list.append(self.yolo_decodes[i](
                    outputs[i]))  # 在这里打几个断点看看

            #---------------------------------------------------------#
            #   将预测框进行堆叠,然后进行非极大抑制
            #---------------------------------------------------------#
            # torch.cat()对矩阵按行进行拼接得到向量
            output = torch.cat(output_list, 1)  # 这里也打几个断点
            # output就是predictions,格式为[batch_size, num_anchors, 85]
            batch_detections = non_max_suppression(output,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)
            # embed()

            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            try:
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return image

            #---------------------------------------------------------#
            #   对预测框进行得分筛选
            #---------------------------------------------------------#
            # coordinates = []# bboxes的坐标

            top_index = batch_detections[:,
                                         4] * batch_detections[:,
                                                               5] > self.confidence
            top_conf = batch_detections[top_index,
                                        4] * batch_detections[top_index, 5]
            top_label = np.array(batch_detections[top_index, -1], np.int32)
            top_bboxes = np.array(batch_detections[top_index, :4])

            # 得到坐标点
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            # coordinates.append((top_xmin,top_xmax,top_ymin,top_ymax))# 把四个坐标点看做一个整体

            #-----------------------------------------------------------------#
            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
            #   因此生成的top_bboxes是相对于有灰条的图像的
            #   我们需要对其进行修改,去除灰条的部分。
            #-----------------------------------------------------------------#

            # boxes存放各目标的坐标
            boxes = yolo_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.model_image_size[0], self.model_image_size[1]]),
                image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max((np.shape(image)[0] + np.shape(image)[1]) //
                        self.model_image_size[0], 1)

        for i, c in enumerate(top_label):
            # embed()
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            # 左上角点的坐标
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            # 右下角点的坐标
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(  # 画框框
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image, boxes  # 将boxes返回
示例#12
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(image,
                            (self.model_image_size[1],
                             self.model_image_size[0])))  # letterbox_image???
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0  # 归一化操作
        photo = np.transpose(photo, (2, 0, 1))  # 通道维度调整(pytorch),有利于GPU处理
        photo = photo.astype(np.float32)
        images = []  # 扩充一个维度
        images.append(photo)  # 扩充一个维度

        images = np.asarray(images)
        images = torch.from_numpy(images)  # numpy转化为tensor
        if self.cuda:
            images = images.cuda()

        with torch.no_grad():
            outputs = self.net(images)  # 图片传入网络,得到网络的预测结果
            output_list = []  # 三个size的预测结果
            for i in range(3):  # 经过三次循环对特征层解码(先验框)
                output_list.append(self.yolo_decodes[i](
                    outputs[i]))  # yolo_decodes 先验框调整的过程
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(
                output,
                self.config["yolo"]["classes"],  # 非极大值抑制
                conf_thres=self.confidence,
                nms_thres=self.iou)
        try:
            batch_detections = batch_detections[0].cpu().numpy()  # 判断图片是否还有框
        except:
            return image
        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条(基于原图的坐标绘制框)
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))  # 定义字体

        thickness = (np.shape(image)[0] + np.shape(image)[1]
                     ) // self.model_image_size[0]  # 框的宽度怎么样子的

        # 画图的代码
        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]  # 取出类的名称
            score = top_conf[i]  # 取出类的得分

            top, left, bottom, right = boxes[i]  # # 取出类的位置
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle(  # 绘画矩形
                    [left + i, top + i, right - i, bottom - i],
                    outline=self.colors[self.class_names.index(
                        predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)  # 写字
            del draw
        return image
示例#13
0
    def detect_image(self, image):
        predict = np.zeros(12)
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output,
                                               len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image, predict

        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]

        predict = self.calsquare(image, boxes, top_label, predict)

        return image, predict
示例#14
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        # 更改格式为float32
        photo = np.array(crop_img, dtype=np.float32)
        # 像素值压缩到0-1之间
        photo /= 255.0
        # 读进来得图片是H*W*C,输入网络时要求C*H*W,因此在这里变换通道
        photo = np.transpose(photo, (2, 0, 1))
        # photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            # 把numpy格式的像素数组转为tensor
            images = torch.from_numpy(images)
            if self.cuda:
                # 把数据放到GPU上
                images = images.cuda()
            # YoloBody得到预测结果
            # self.net == self.net.forword(images)
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            # 用第i个DecodeBox来处理第i个output
            output_list.append(self.yolo_decodes[i](outputs[i]))

        # 将13、26、52的output拼接到一起  bs * 10647 * [4+1+num_classes]
        output = torch.cat(output_list, 1)

        # 使用非极大似然抑制剔除一定区域内的重复框
        # bs * n * [(x1,y1,x2,y2)+obj_conf+class_conf+class_pred]
        batch_detections = non_max_suppression(output,
                                               len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=self.iou)

        # 整理检测结果
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image

        # 根据score再筛选一遍,但是在non_max_suppression已经使用score筛选过了为什么还要筛选呢?
        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        # 根据筛选结果得到符合要求的score、label、bboxes
        top_score = batch_detections[top_index,
                                     4] * batch_detections[top_index, 5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        # 将(x1,y1,x2,y2)分别扩展至n*1维,n为box总数
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条,得到原图上预测框(y1,x1,y2,x2)坐标(top,left,bottom,right)
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        # 绘制检测结果
        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))
        # 矩形框四边线条厚度
        # thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]
        thickness = int(
            max(np.ceil(np.shape(image)[0] / self.model_image_size[0]),
                np.ceil(np.shape(image)[1] / self.model_image_size[0]))) + 1

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_score[i]

            top, left, bottom, right = boxes[i]
            # top = top - 5
            # left = left - 5
            # bottom = bottom + 5
            # right = right + 5

            # top = max(0, np.floor(top + 0.5).astype('int32'))
            # left = max(0, np.floor(left + 0.5).astype('int32'))
            # bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            # right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))

            top = max(0, np.ceil(top).astype('int32'))
            left = max(0, np.ceil(left).astype('int32'))
            bottom = min(np.shape(image)[0], np.ceil(bottom).astype('int32'))
            right = min(np.shape(image)[1], np.ceil(right).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            # 返回使用指定字体对象显示给定字符串所需要的图像尺寸
            label_size = draw.textsize(label, font)
            # label = label.encode('utf-8')
            # print(label)

            # 如果顶部有文本框的空间,文本框放置在预测框左上方的外部
            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            # 顶部没有文本框的空间,文本框放置在预测框左上方的内部
            else:
                # text_origin = np.array([left, top + 1])
                text_origin = np.array([left + 1, top + 1])

            # 绘制预测框的空心矩形
            draw.rectangle(
                [left, top, right, bottom],
                outline=self.colors[self.class_names.index(predicted_class)],
                width=thickness)
            # 绘制文本框的实心矩形
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            # 绘制文本框内的文字
            # draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font = font)
            # fill = (0, 0, 0) 文字颜色纯黑
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)

            del draw
        return image
示例#15
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)

        images = np.asarray(images)
        images = torch.from_numpy(images)
        if self.cuda:
            images = images.cuda()

        with torch.no_grad():
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(
                output,
                self.config["yolo"]["classes"],
                conf_thres=self.confidence,
                nms_thres=self.iou)
            print(f'[INFO] batch_detections: {batch_detections[0].shape}')
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image
        top_index = batch_detections[:, 4] > self.confidence
        top_conf = batch_detections[top_index, 4]
        top_angle = batch_detections[top_index, 5:8]
        top_bboxes = np.array(batch_detections[top_index, :4])

        # print(f'[INFO] top_index: {top_index.shape}')
        # print(f'[INFO] top_conf: {top_conf.shape}')
        # print(f'[INFO] top_angle: {top_angle.shape}')
        # print(f'[INFO] top_bboxes: {top_bboxes.shape}')

        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf', size=15)

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]

        predictions = []
        for i, score in enumerate(top_conf):
            infor = {}
            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5
            yaw, pitch, roll = top_angle[i] * 90

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))
            infor["box"] = [left, top, right, bottom]
            infor["angle"][yaw, pitch, roll]
            predictions.append(infor)

            # draw box and angle in image
            # draw = ImageDraw.Draw(image)

            # for i in range(thickness):
            #     draw.rectangle(
            #         [left + i, top + i, right - i, bottom - i],
            #         outline="red")
            # draw.text([left + i*10, top + i*10], str(score),
            #           fill=(255, 0, 0), font=font)
            # del draw
            # image_numpy = np.array(image)
            # print(f'[PREDICT] box: {[top, left, bottom, right]}')
            # print(f'[PREDICT] yaw = {yaw}, pitch = {pitch}, roll = {roll}')
            # img = draw_axis(image_numpy, yaw, pitch, roll, (left+right)//2, (top + bottom)//2)
            # image = Image.fromarray(img)
            # image.save('test.jpg')
            # print('save successfully !!!')

        return predictions
示例#16
0
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        # 图片处理
        crop_img = np.array(
            letterbox_image(image,
                            (self.model_image_size[0],
                             self.model_image_size[1])))  # 对图片RESIZE并加灰条
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0  # 归一化
        photo = np.transpose(photo, (2, 0, 1))  # 在pytorch中通道数在第一个,所以在这调整顺序
        photo = photo.astype(np.float32)  # 转换数据类型
        images = []
        images.append(photo)

        images = np.asarray(images)
        images = torch.from_numpy(images)  # 将numpy转换成tenor类型
        if self.cuda:
            images = images.cuda()
        # 放入网络中进行预测并画框
        with torch.no_grad():
            outputs = self.net(images)  # 图片放入网络中
            output_list = []
            for i in range(3):  # 特征层解码,因为特征金字塔有三个尺度的输出,所以要循环三次,将三个特征层全部解码。
                output_list.append(self.yolo_decodes[i](
                    outputs[i]))  # 解码:调整先验框
            output = torch.cat(output_list, 1)  # 将预测结果堆叠起来
            batch_detections = non_max_suppression(
                output,
                self.config["yolo"]["classes"],
                conf_thres=self.confidence,
                nms_thres=0.3)  # non_max_suppression()是进行非极大抑制
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image
        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence  # 将框框的置信度和类的置信度相乘进行判断
        top_conf = batch_detections[top_index, 4] * batch_detections[
            top_index, 5]  # 下面这三行是将置信度较高的筛选出来
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        '''
        目前框框的位置是相对于有灰条图片左上角的位置。去掉灰条要转换为原图的左上角的位置。
        yolo_correct_boxes函数就是完成这样的坐标变换
        '''
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))  # 定义字体

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]  # 定义框框的宽度
        # 下面的代码就是用来画图的
        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]  # 获得类的名称
            score = top_conf[i]  # 获得得分
            # 获得位置信息
            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)  # 在框上写字
            del draw
        return image