示例#1
0
def preprocess(img, bbox_labels, mode, settings):
    img_width, img_height = img.size
    sampled_labels = bbox_labels
    if mode == 'train':
        if settings._apply_distort:
            img = image_util.distort_image(img, settings)
        if settings._apply_expand:
            img, bbox_labels, img_width, img_height = image_util.expand_image(
                img, bbox_labels, img_width, img_height, settings)
        # sampling
        batch_sampler = []
        # hard-code here
        batch_sampler.append(
            image_util.sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0))
        batch_sampler.append(
            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0))
        batch_sampler.append(
            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0))
        batch_sampler.append(
            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0))
        batch_sampler.append(
            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0))
        batch_sampler.append(
            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0))
        batch_sampler.append(
            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0))
        sampled_bbox = image_util.generate_batch_samples(
            batch_sampler, bbox_labels)

        img = np.array(img)
        if len(sampled_bbox) > 0:
            idx = int(np.random.uniform(0, len(sampled_bbox)))
            img, sampled_labels = image_util.crop_image(
                img, bbox_labels, sampled_bbox[idx], img_width, img_height)

        img = Image.fromarray(img)
    img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS)
    img = np.array(img)

    if mode == 'train':
        mirror = int(np.random.uniform(0, 2))
        if mirror == 1:
            img = img[:, ::-1, :]
            for i in six.moves.xrange(len(sampled_labels)):
                tmp = sampled_labels[i][1]
                sampled_labels[i][1] = 1 - sampled_labels[i][3]
                sampled_labels[i][3] = 1 - tmp
    # HWC to CHW
    if len(img.shape) == 3:
        img = np.swapaxes(img, 1, 2)
        img = np.swapaxes(img, 1, 0)
    # RBG to BGR
    img = img[[2, 1, 0], :, :]
    img = img.astype('float32')
    img -= settings.img_mean
    img = img * 0.007843
    return img, sampled_labels
示例#2
0
    def reader():
        with open(file_list) as flist:
            lines = [line.strip() for line in flist]
            if shuffle:
                # 打乱数据
                random.shuffle(lines)

            for line in lines:
                if mode == 'train' or mode == 'test':
                    # 获取图像的路径和对应标注的文件路径
                    img_path, label_path = line.split()
                    img_path = os.path.join(settings.data_dir, img_path)
                    label_path = os.path.join(settings.data_dir, label_path)
                elif mode == 'infer':
                    # 获取图像的路径
                    img_path = os.path.join(settings.data_dir, line)
                    print img_path

                img = Image.open(img_path)
                # 获取图像的原始大小
                img_width, img_height = img.size
                img = np.array(img)

                # 保存列表的结构: label | xmin | ymin | xmax | ymax | difficult
                if mode == 'train' or mode == 'test':
                    # 保存每个标注框
                    bbox_labels = []
                    # 开始读取标注信息
                    root = xml.etree.ElementTree.parse(label_path).getroot()
                    # 查询每个标注的信息
                    for object in root.findall('object'):
                        # 每个标注框的信息
                        bbox_sample = []
                        # start from 1
                        bbox_sample.append(
                            float(
                                settings.label_list.index(
                                    object.find('name').text)))
                        bbox = object.find('bndbox')
                        difficult = float(object.find('difficult').text)
                        # 获取标注信息,并计算比例保存
                        bbox_sample.append(
                            float(bbox.find('xmin').text) / img_width)
                        bbox_sample.append(
                            float(bbox.find('ymin').text) / img_height)
                        bbox_sample.append(
                            float(bbox.find('xmax').text) / img_width)
                        bbox_sample.append(
                            float(bbox.find('ymax').text) / img_height)
                        bbox_sample.append(difficult)
                        # 将整个框的信息保存
                        bbox_labels.append(bbox_sample)

                    sample_labels = bbox_labels
                    if mode == 'train':
                        batch_sampler = []
                        # 样本定义
                        batch_sampler.append(
                            image_util.sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0))

                        batch_sampler.append(
                            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0))

                        batch_sampler.append(
                            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0))

                        batch_sampler.append(
                            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0))

                        batch_sampler.append(
                            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0))

                        batch_sampler.append(
                            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0))

                        batch_sampler.append(
                            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0))
                        # 将标注信息生成一个batch
                        sampled_bbox = image_util.generate_batch_samples(batch_sampler,
                                                                         bbox_labels)

                        if len(sampled_bbox) > 0:
                            idx = int(random.uniform(0, len(sampled_bbox)))
                            img, sample_labels = image_util.crop_image(
                                img, bbox_labels, sampled_bbox[idx], img_width,
                                img_height)

                img = Image.fromarray(img)
                # 设置图像大小
                img = img.resize((settings.resize_w, settings.resize_h),
                                 Image.ANTIALIAS)
                img = np.array(img)

                if mode == 'train':
                    mirror = int(random.uniform(0, 2))
                    if mirror == 1:
                        img = img[:, ::-1, :]
                        for i in xrange(len(sample_labels)):
                            tmp = sample_labels[i][1]
                            sample_labels[i][1] = 1 - sample_labels[i][3]
                            sample_labels[i][3] = 1 - tmp

                if len(img.shape) == 3:
                    img = np.swapaxes(img, 1, 2)
                    img = np.swapaxes(img, 1, 0)

                img = img.astype('float32')
                img -= settings.img_mean
                img = img.flatten()

                if mode == 'train' or mode == 'test':
                    if mode == 'train' and len(sample_labels) == 0:
                        continue
                    yield img.astype('float32'), sample_labels
                elif mode == 'infer':
                    yield img.astype('float32')
示例#3
0
def preprocess(img, bbox_labels, mode, settings, image_path):
    img_width, img_height = img.size
    sampled_labels = bbox_labels
    if mode == 'train':
        if settings.apply_distort:
            img = image_util.distort_image(img, settings)
        if settings.apply_expand:
            img, bbox_labels, img_width, img_height = image_util.expand_image(
                img, bbox_labels, img_width, img_height, settings)

        # sampling
        batch_sampler = []
        # used for continuous evaluation
        if 'ce_mode' in os.environ:
            random.seed(0)
            np.random.seed(0)
        prob = np.random.uniform(0., 1.)
        if prob > settings.data_anchor_sampling_prob:
            scale_array = np.array([16, 32, 64, 128, 256, 512])
            batch_sampler.append(
                image_util.sampler(1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2,
                                   0.0, True))
            sampled_bbox = image_util.generate_batch_random_samples(
                batch_sampler, bbox_labels, img_width, img_height, scale_array,
                settings.resize_width, settings.resize_height)
            img = np.array(img)
            if len(sampled_bbox) > 0:
                idx = int(np.random.uniform(0, len(sampled_bbox)))
                img, sampled_labels = image_util.crop_image_sampling(
                    img, bbox_labels, sampled_bbox[idx], img_width, img_height,
                    settings.resize_width, settings.resize_height,
                    settings.min_face_size)

            img = img.astype('uint8')
            img = Image.fromarray(img)

        else:
            # hard-code here
            batch_sampler.append(
                image_util.sampler(1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0,
                                   0.0, True))
            batch_sampler.append(
                image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0,
                                   0.0, True))
            batch_sampler.append(
                image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0,
                                   0.0, True))
            batch_sampler.append(
                image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0,
                                   0.0, True))
            batch_sampler.append(
                image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0,
                                   0.0, True))
            sampled_bbox = image_util.generate_batch_samples(
                batch_sampler, bbox_labels, img_width, img_height)

            img = np.array(img)
            if len(sampled_bbox) > 0:
                idx = int(np.random.uniform(0, len(sampled_bbox)))
                img, sampled_labels = image_util.crop_image(
                    img, bbox_labels, sampled_bbox[idx], img_width, img_height,
                    settings.resize_width, settings.resize_height,
                    settings.min_face_size)

            img = Image.fromarray(img)
    interp_mode = [
        Image.BILINEAR, Image.HAMMING, Image.NEAREST, Image.BICUBIC,
        Image.LANCZOS
    ]
    interp_indx = np.random.randint(0, 5)

    img = img.resize((settings.resize_width, settings.resize_height),
                     resample=interp_mode[interp_indx])
    img = np.array(img)

    if mode == 'train':
        mirror = int(np.random.uniform(0, 2))
        if mirror == 1:
            img = img[:, ::-1, :]
            for i in six.moves.xrange(len(sampled_labels)):
                tmp = sampled_labels[i][1]
                sampled_labels[i][1] = 1 - sampled_labels[i][3]
                sampled_labels[i][3] = 1 - tmp

    img = to_chw_bgr(img)
    img = img.astype('float32')
    img -= settings.img_mean
    img = img * settings.scale
    return img, sampled_labels
示例#4
0
    def reader():
        lines = open(file_list, "r").readlines()
        if shuffle:
            random.shuffle(lines)

        for line_id, line in enumerate(lines):
            if mode == "train" or mode == "test":
                img_path, label_path = line.split()
                img_path = os.path.join(settings.data_dir, "resized_images",
                                        img_path)
                label_path = os.path.join(settings.data_dir, "annotations",
                                          label_path)
            elif mode == "infer":
                img_path = os.path.join(settings.data_dir, "resized_images",
                                        line.strip().split()[0])

            img = Image.open(img_path)
            img_width, img_height = img.size
            img = np.array(img)

            if mode == "train" or mode == "test":
                labels = json.loads(open(label_path, "r").readline())
                if len(labels) == 0: continue

                bbox_labels = []
                for lbl in labels:
                    bbox_labels.append([
                        settings.label_list[lbl["text"]],  # label
                        lbl["posX"] / ORIGINAL_WIDTH,  # xmin
                        lbl["posY"] / ORIGINAL_HEIGH,  # ymin
                        (lbl["posX"] + lbl["width"]) / ORIGINAL_WIDTH,  # xmax
                        (lbl["posY"] + lbl["height"]) / ORIGINAL_HEIGH,  # ymax
                        1.  # difficult
                    ])
                sample_labels = bbox_labels

                if mode == "train":
                    sampled_bbox = image_util.generate_batch_samples(
                        get_batch_sampler(), bbox_labels, img_width,
                        img_height)

                    if len(sampled_bbox) > 0:
                        idx = int(random.uniform(0, len(sampled_bbox)))
                        img, sample_labels = image_util.crop_image(
                            img, bbox_labels, sampled_bbox[idx], img_width,
                            img_height)

            img = Image.fromarray(img)
            img = img.resize((settings.resize_w, settings.resize_h),
                             Image.ANTIALIAS)
            img = np.array(img)

            if mode == "train":
                mirror = int(random.uniform(0, 2))
                if mirror == 1:
                    img = img[:, ::-1, :]
                    for i in xrange(len(sample_labels)):
                        tmp = sample_labels[i][1]
                        sample_labels[i][1] = 1 - sample_labels[i][3]
                        sample_labels[i][3] = 1 - tmp

            img = np.transpose(img, [2, 0, 1])  # channel first
            img = img.astype("float32")
            img -= settings.img_mean
            img = img.flatten()

            if mode == "train" or mode == "test":
                if mode == "train" and len(sample_labels) == 0: continue
                yield img, sample_labels
            elif mode == "infer":
                yield img.astype("float32")