Пример #1
0
def main():
    # 调试模式
    cudnn.deterministic = cfg.cudnn_deterministic
    cudnn.benchmark = cfg.cudnn_benchmark
    if cudnn.deterministic:
        torch.manual_seed(1)
        torch.cuda.manual_seed_all(1)
        np.random.seed(1)
        random.seed(1)

    use_gpu = torch.cuda.is_available() and cfg.cuda
    device = torch.device('cuda' if use_gpu else 'cpu')

    # 数据读取
    annotations = get_all_loader_annotations(print_fn=print, training=False)
    dataset = Dataset(annotations,
                      batch_size=1,
                      sub_means=True,
                      training=False)
    data_loader = data.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=False,
                                  num_workers=0,
                                  pin_memory=True)
    data_iterator = iter(data_loader)

    # 模型
    if cfg.model == 'vgg16':
        model = Vgg16()  # net 用于获取模型参数, model 用于训练
    else:
        model = None

    model = model.to(device)  # gpu 运行
    net_utils.load_model(cfg.test.model, model, use_gpu=use_gpu)  # 恢复模型参数
    model.eval()  # 测试模式

    num_images = len(dataset)
    for i in range(num_images):
        items = next(data_iterator)
        images, im_info, gt_boxes, num_boxes = [x.to(device) for x in items]

        # 前向
        with torch.no_grad():
            result = model(images, im_info, gt_boxes, num_boxes)
        rois, pred_cls_prob, pred_loc, _, _, _, _, _ = result

        # 恢复预测结果
        # [k, num_cls], [k, 4]
        im_info, rois, pred_cls_prob, pred_loc = im_info.cpu(), rois.cpu(
        ), pred_cls_prob.cpu(), pred_loc.cpu()
        scores, boxes = inference_utils.process_boxes(im_info, rois,
                                                      pred_cls_prob, pred_loc)

        # 逐类别 nms
        results = inference_utils.nms_all(scores, boxes, score_threshold=0.05)
        results = inference_utils.get_top_k_boxes(results)

        # 整理成可视化需要的格式
        pred_boxes = np.zeros([0, 4], dtype=np.float32)
        pred_scores = np.zeros([0], dtype=np.float32)
        pred_labels = []

        for j in range(len(results)):
            if len(results[j]) == 0:
                continue
            obj_boxes = results[j]
            pred_boxes = np.concatenate([pred_boxes, obj_boxes[:, 0:4]],
                                        axis=0)
            pred_scores = np.concatenate([pred_scores, obj_boxes[:, 4]],
                                         axis=0)
            pred_labels += [j] * len(obj_boxes)

        pred_boxes = pred_boxes.reshape(-1, 4)
        pred_scores = pred_scores.reshape(-1)
        pred_labels = np.array(pred_labels, dtype=np.int32).reshape(-1)

        image = dataset.load_image(i)[:, :, ::-1]  # bgr -> rgb
        gt_boxes = np.concatenate(
            [annotations[i]['boxes'], annotations[i]['labels'].reshape(-1, 1)],
            axis=1)

        visualization.show_prediction(image,
                                      (pred_boxes, pred_labels, pred_scores),
                                      gt_boxes,
                                      add_mean=False)
Пример #2
0
class TFRecord(object):
    def __init__(self):
        self.data_path = path_params['data_path']
        self.tfrecord_dir = path_params['tfrecord_dir']
        self.train_tfrecord_name = path_params['train_tfrecord_name']
        self.test_tfrecord_name = path_params['test_tfrecord_name']
        self.image_size = model_params['image_size']
        self.cell_size = model_params['cell_size']
        self.class_num = model_params['num_classes']
        self.class_ind = dict(zip(CLASSES, range(self.class_num)))
        self.batch_size = solver_params['batch_size']
        self.flipped = solver_params['flipped']
        self.dataset = Dataset()

    # 数值形式的数据,首先转换为string,再转换为int形式进行保存
    def _int64_feature(self, value):
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

    # 数组形式的数据,首先转换为string,再转换为二进制形式进行保存
    def _bytes_feature(self, value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

    def create_tfrecord(self):
        # 获取作为训练验证集的图片序列
        trainval_path = os.path.join(self.data_path, 'ImageSets', 'Main',
                                     'trainval.txt')
        if self.flipped:
            tf_file = os.path.join(self.tfrecord_dir, self.train_tfrecord_name)
            if not os.path.exists(tf_file):
                # 循环写入每一张图像和标签到tfrecord文件
                writer = tf.python_io.TFRecordWriter(tf_file)
                with open(trainval_path, 'r') as read:
                    lines = read.readlines()
                    for line in lines:
                        image_num = line[0:-1]

                        # 获得当前样本数据和标签信息
                        image, image_flipped = self.dataset.load_image(
                            image_num=image_num)
                        label, label_flipped = self.dataset.load_annotation(
                            image_num=image_num)

                        # 转换为字符串
                        image_string = image.tostring()
                        image_flipped_string = image_flipped.tostring()

                        # 转换为字符串
                        label_string = label.tostring()
                        label_flipped_string = label_flipped.tostring()

                        example = tf.train.Example(features=tf.train.Features(
                            feature={
                                'image': self._bytes_feature(image_string),
                                'label': self._bytes_feature(label_string)
                            }))
                        writer.write(example.SerializeToString())
                        example = tf.train.Example(features=tf.train.Features(
                            feature={
                                'image': self._bytes_feature(
                                    image_flipped_string),
                                'label': self._bytes_feature(
                                    label_flipped_string)
                            }))
                        writer.write(example.SerializeToString())
                writer.close()
                print('Finish trainval.tfrecord Done')
        else:
            tf_file = os.path.join(self.tfrecord_dir, self.train_tfrecord_name)
            if not os.path.exists(tf_file):
                # 循环写入每一张图像和标签到tfrecord文件
                writer = tf.python_io.TFRecordWriter(tf_file)
                with open(trainval_path, 'r') as read:
                    lines = read.readlines()
                    for line in lines:
                        image_num = line[0:-1]
                        image = self.dataset.load_image(image_num)
                        label = self.dataset.load_annotation(image_num)

                        image_string = image.tostring()
                        label_string = label.tostring()

                        example = tf.train.Example(features=tf.train.Features(
                            feature={
                                'image':
                                tf.train.Feature(bytes_list=tf.train.BytesList(
                                    value=[image_string])),
                                'label':
                                tf.train.Feature(bytes_list=tf.train.BytesList(
                                    value=[label_string]))
                            }))
                        writer.write(example.SerializeToString())
                writer.close()
                print('Finish trainval.tfrecord Done')

    def parse_single_example(self, file_name):
        """
        :param file_name:待解析的tfrecord文件的名称
        :return: 从文件中解析出的单个样本的相关特征,image, label
        """

        tfrecord_file = os.path.join(self.tfrecord_dir,
                                     self.train_tfrecord_name)

        # 定义解析TFRecord文件操作
        reader = tf.TFRecordReader()

        # 创建样本文件名称队列
        filename_queue = tf.train.string_input_producer([tfrecord_file])

        # 解析单个样本文件
        _, serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example(serialized_example,
                                           features={
                                               'image':
                                               tf.FixedLenFeature([],
                                                                  tf.string),
                                               'label':
                                               tf.FixedLenFeature([],
                                                                  tf.string)
                                           })

        image = features['image']
        label = features['label']

        return image, label

    def parse_batch_examples(self, file_name):
        """
        :param file_name:待解析的tfrecord文件的名称
        :return: 解析得到的batch_size个样本
        """
        batch_size = self.batch_size
        min_after_dequeue = 100
        num_threads = 8
        capacity = min_after_dequeue + 3 * batch_size

        image, label = self.parse_single_example(file_name)
        image_batch, label_batch = tf.train.shuffle_batch(
            [image, label],
            batch_size=batch_size,
            num_threads=num_threads,
            capacity=capacity,
            min_after_dequeue=min_after_dequeue)

        # 进行解码
        image_batch = tf.decode_raw(image_batch, tf.float32)
        label_batch = tf.decode_raw(label_batch, tf.float32)

        # 转换为网络输入所要求的形状
        image_batch = tf.reshape(
            image_batch,
            [self.batch_size, self.image_size, self.image_size, 3])
        label_batch = tf.reshape(label_batch, [
            self.batch_size, self.cell_size, self.cell_size, 5 + self.class_num
        ])

        return image_batch, label_batch
Пример #3
0
class TFRecord(object):
    def __init__(self):
        self.data_path = path_params['data_path']
        self.tfrecord_dir = path_params['tfrecord_dir']
        self.train_tfrecord_name = path_params['train_tfrecord_name']
        self.input_width = model_params['input_width']
        self.input_height = model_params['input_height']
        self.channels = model_params['channels']
        self.class_num = len(model_params['classes'])
        self.batch_size = solver_params['batch_size']
        self.dataset = Dataset()

    def _int64_feature(self, value):
        if not isinstance(value, list):
            value = [value]
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

    def _float_feature(self, value):
        if not isinstance(value, list):
            value = [value]
        return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

    def _bytes_feature(self, value):
        if not isinstance(value, list):
            value = [value]
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

    def create_tfrecord(self):
        # 获取作为训练验证集的图片序列
        trainval_path = os.path.join(self.data_path, 'ImageSets', 'Main',
                                     'trainval.txt')

        tf_file = os.path.join(self.tfrecord_dir, self.train_tfrecord_name)
        if os.path.exists(tf_file):
            os.remove(tf_file)

        writer = tf.python_io.TFRecordWriter(tf_file)
        with open(trainval_path, 'r') as read:
            lines = read.readlines()
            for line in lines:
                num = line[0:-1]
                image = self.dataset.load_image(num)
                image_shape = image.shape
                boxes = self.dataset.load_label(num)

                if len(boxes) == 0:
                    continue

                while len(boxes) < 300:
                    boxes = np.append(boxes, [[0.0, 0.0, 0.0, 0.0, 0.0]],
                                      axis=0)

                boxes = np.array(boxes, dtype=np.float32)
                image_string = image.tobytes()
                boxes_string = boxes.tobytes()

                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'image':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[image_string])),
                        'bbox':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[boxes_string])),
                        'height':
                        tf.train.Feature(int64_list=tf.train.Int64List(
                            value=[image_shape[0]])),
                        'width':
                        tf.train.Feature(int64_list=tf.train.Int64List(
                            value=[image_shape[1]])),
                    }))
                writer.write(example.SerializeToString())
        writer.close()
        print('Finish trainval.tfrecord Done')

    def parse_single_example(self, serialized_example):
        """
        :param file_name:待解析的tfrecord文件的名称
        :return: 从文件中解析出的单个样本的相关特征,image, label
        """
        features = tf.parse_single_example(serialized_example,
                                           features={
                                               'image':
                                               tf.FixedLenFeature([],
                                                                  tf.string),
                                               'bbox':
                                               tf.FixedLenFeature([],
                                                                  tf.string),
                                               'height':
                                               tf.FixedLenFeature([],
                                                                  tf.int64),
                                               'width':
                                               tf.FixedLenFeature([], tf.int64)
                                           })

        tf_image = tf.decode_raw(features['image'], tf.uint8)
        tf_bbox = tf.decode_raw(features['bbox'], tf.float32)
        tf_height = features['height']
        tf_width = features['width']

        # 转换为网络输入所要求的形状
        tf_image = tf.reshape(tf_image, [tf_height, tf_width, 3])
        tf_label = tf.reshape(tf_bbox, [150, 5])

        # preprocess
        tf_image, y_true_13, y_true_26, y_true_52 = tf.py_func(
            self.dataset.preprocess_data,
            inp=[tf_image, tf_label, self.input_height, self.input_width],
            Tout=[tf.float32, tf.float32, tf.float32, tf.float32])

        return tf_image, y_true_13, y_true_26, y_true_52

    def create_dataset(self,
                       filenames,
                       batch_num,
                       batch_size=1,
                       is_shuffle=False):
        """
        :param filenames: record file names
        :param batch_size: batch size
        :param is_shuffle: whether shuffle
        :param n_repeats: number of repeats
        :return:
        """
        dataset = tf.data.TFRecordDataset(filenames)
        dataset = dataset.map(self.parse_single_example, num_parallel_calls=8)
        if is_shuffle:
            dataset = dataset.shuffle(batch_num)
        dataset = dataset.batch(batch_size)
        dataset = dataset.repeat()
        dataset = dataset.prefetch(batch_size)

        return dataset