Exemplo n.º 1
0
def apply_transforms(img_path, pipeline_cfg):
    """Apply transforms pipeline and get both formatted data and the image
    without formatting."""
    data = dict(img_info=dict(filename=img_path), img_prefix=None)

    def split_pipeline_cfg(pipeline_cfg):
        """to split the transfoms into image_transforms and
        format_transforms."""
        image_transforms_cfg, format_transforms_cfg = [], []
        if pipeline_cfg[0]['type'] != 'LoadImageFromFile':
            pipeline_cfg.insert(0, dict(type='LoadImageFromFile'))
        for transform in pipeline_cfg:
            if transform['type'] in FORMAT_TRANSFORMS_SET:
                format_transforms_cfg.append(transform)
            else:
                image_transforms_cfg.append(transform)
        return image_transforms_cfg, format_transforms_cfg

    image_transforms, format_transforms = split_pipeline_cfg(pipeline_cfg)
    image_transforms = Compose(image_transforms)
    format_transforms = Compose(format_transforms)

    intermediate_data = image_transforms(data)
    inference_img = copy.deepcopy(intermediate_data['img'])
    format_data = format_transforms(intermediate_data)

    return format_data, inference_img
def test_randomgrayscale():

    # test rgb2gray, return the grayscale image with p>1
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    kwargs = dict(gray_prob=2)

    aug = []
    aug.extend([mmcls_transforms.RandomGrayscale(**kwargs)])
    composed_transform = Compose(aug)
    print(composed_transform)
    results = dict()
    results['img'] = in_img
    img = composed_transform(results)['img']
    computed_gray = (in_img[:, :, 0] * 0.299 + in_img[:, :, 1] * 0.587 +
                     in_img[:, :, 2] * 0.114)
    for i in range(img.shape[2]):
        assert_array_almost_equal(img[:, :, i], computed_gray, decimal=4)
    assert img.shape == (10, 10, 3)

    # test rgb2gray, return the original image with p=-1
    in_img = np.random.rand(10, 10, 3).astype(np.float32)
    kwargs = dict(gray_prob=-1)

    aug = []
    aug.extend([mmcls_transforms.RandomGrayscale(**kwargs)])
    composed_transform = Compose(aug)
    results = dict()
    results['img'] = in_img
    img = composed_transform(results)['img']
    assert_array_equal(img, in_img)
    assert img.shape == (10, 10, 3)

    # test image with one channel with our method
    # and the function from torchvision
    in_img = np.random.rand(10, 10, 1).astype(np.float32)
    kwargs = dict(gray_prob=2)

    aug = []
    aug.extend([mmcls_transforms.RandomGrayscale(**kwargs)])
    composed_transform = Compose(aug)
    results = dict()
    results['img'] = in_img
    img = composed_transform(results)['img']
    assert_array_equal(img, in_img)
    assert img.shape == (10, 10, 1)

    in_img_pil = Image.fromarray(in_img[:, :, 0], mode='L')
    kwargs = dict(p=2)
    aug = []
    aug.extend([torchvision.transforms.RandomGrayscale(**kwargs)])
    composed_transform = Compose(aug)
    img_pil = composed_transform(in_img_pil)
    assert_array_equal(np.array(img_pil), np.array(in_img_pil))
    assert np.array(img_pil).shape == (10, 10)
Exemplo n.º 3
0
def inference_model(model, img):
    """Inference image(s) with the classifier.

    Args:
        model (nn.Module): The loaded classifier.
        img (str/ndarray): The image filename.

    Returns:
        result (dict): The classification results that contains
            `class_name`, `pred_label` and `pred_score`.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # build the data pipeline
    test_pipeline = Compose(cfg.data.test.pipeline)
    # prepare data
    data = dict(img_info=dict(filename=img), img_prefix=None)
    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]

    # forward the model
    with torch.no_grad():
        scores = model(return_loss=False, **data)
        pred_score = np.max(scores, axis=1)[0]
        pred_label = np.argmax(scores, axis=1)[0]
        result = {'pred_label': pred_label, 'pred_score': pred_score}
    result['class_name'] = model.CLASSES[result['pred_label']]
    return result
def inference_model_batch(model, images_nps, topn=5):
    """Inference image(s) with the classifier.

    Args:
        model (nn.Module): The loaded classifier.
        img (str/ndarray): The image filename or loaded image.

    Returns:
        result (list of dict): The classification results that contains
            `class_name`, `pred_label` and `pred_score`.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # build the data pipeline
    if cfg.data.test.pipeline[0]['type'] == 'LoadImageFromFile':
        cfg.data.test.pipeline.pop(0)

    test_pipeline = Compose(cfg.data.test.pipeline)

    with torch.no_grad():

        inference_results = []
        for images_batch in sly.batched(images_nps, g.batch_size):
            data = [dict(img=img) for img in images_batch]

            data = [test_pipeline(row) for row in data]
            data = collate(data, samples_per_gpu=1)

            if next(model.parameters()).is_cuda:
                # scatter to specified GPU
                data = scatter(data, [device])[0]

            batch_scores = np.asarray(model(return_loss=False, **data))
            batch_top_indexes = batch_scores.argsort(axis=1)[:,
                                                             -topn:][:, ::-1]

            for scores, top_indexes in zip(batch_scores, batch_top_indexes):
                inference_results.append({
                    'label':
                    top_indexes.astype(int).tolist(),
                    'score':
                    scores[top_indexes].astype(float).tolist(),
                    'class':
                    np.asarray(model.CLASSES)[top_indexes].tolist()
                })

    return inference_results
Exemplo n.º 5
0
def build_dataset_pipeline(cfg, phase):
    """build dataset and pipeline from config.

    Separate the pipeline except 'LoadImageFromFile' step if
    'LoadImageFromFile' in the pipeline.
    """
    data_cfg = cfg.data[phase]
    loadimage_pipeline = []
    if len(data_cfg.pipeline
           ) != 0 and data_cfg.pipeline[0]['type'] == 'LoadImageFromFile':
        loadimage_pipeline.append(data_cfg.pipeline.pop(0))
    origin_pipeline = data_cfg.pipeline
    data_cfg.pipeline = loadimage_pipeline
    dataset = build_dataset(data_cfg)
    pipeline = Compose(origin_pipeline)

    return dataset, pipeline
Exemplo n.º 6
0
def make_img_transform(img_size,
                       norm_cfg=imagenet_norm,
                       to_rgb=True,
                       divisor=32):
    img_size = tuple(img_size)
    imw, imh = img_size
    return Compose([
        AlbuAug([
            A.LongestMaxSize(max_size=imw),
            A.PadIfNeeded(min_height=imh,
                          min_width=imw,
                          border_mode=cv2.BORDER_REPLICATE),
        ]),
        Normalize(**norm_cfg, to_rgb=to_rgb),
        ImageToTensor(['img']),
        Collect(['img']),
    ])
Exemplo n.º 7
0
def inference(config_file, checkpoint, classes, args):
    cfg = Config.fromfile(config_file)

    model = init_model(cfg, checkpoint, device=args.device)
    model.CLASSES = classes

    # build the data pipeline
    if cfg.data.test.pipeline[0]['type'] != 'LoadImageFromFile':
        cfg.data.test.pipeline.insert(0, dict(type='LoadImageFromFile'))
    if cfg.data.test.type in ['CIFAR10', 'CIFAR100']:
        # The image shape of CIFAR is (32, 32, 3)
        cfg.data.test.pipeline.insert(1, dict(type='Resize', size=32))

    data = dict(img_info=dict(filename=args.img), img_prefix=None)

    test_pipeline = Compose(cfg.data.test.pipeline)
    data = test_pipeline(data)
    resolution = tuple(data['img'].shape[1:])
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [args.device])[0]

    # forward the model
    result = {'resolution': resolution}
    with torch.no_grad():
        if args.inference_time:
            time_record = []
            for _ in range(10):
                start = time()
                scores = model(return_loss=False, **data)
                time_record.append((time() - start) * 1000)
            result['time_mean'] = np.mean(time_record[1:-1])
            result['time_std'] = np.std(time_record[1:-1])
        else:
            scores = model(return_loss=False, **data)

        pred_score = np.max(scores, axis=1)[0]
        pred_label = np.argmax(scores, axis=1)[0]
        result['pred_label'] = pred_label
        result['pred_score'] = float(pred_score)
    result['pred_class'] = model.CLASSES[result['pred_label']]

    result['model'] = config_file.stem

    return result
def inference_model(model, img, topn=5):
    """Inference image(s) with the classifier.

    Args:
        model (nn.Module): The loaded classifier.
        img (str/ndarray): The image filename or loaded image.

    Returns:
        result (list of dict): The classification results that contains
            `class_name`, `pred_label` and `pred_score`.
    """
    cfg = model.cfg
    device = next(model.parameters()).device  # model device
    # build the data pipeline
    if isinstance(img, str):
        if cfg.data.test.pipeline[0]['type'] != 'LoadImageFromFile':
            cfg.data.test.pipeline.insert(0, dict(type='LoadImageFromFile'))
        data = dict(img_info=dict(filename=img), img_prefix=None)
    else:
        if cfg.data.test.pipeline[0]['type'] == 'LoadImageFromFile':
            cfg.data.test.pipeline.pop(0)
        data = dict(img=img)
    test_pipeline = Compose(cfg.data.test.pipeline)
    data = test_pipeline(data)
    data = collate([data], samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter to specified GPU
        data = scatter(data, [device])[0]

    # forward the model
    with torch.no_grad():
        scores = model(return_loss=False, **data)
        model_out = scores[0]
        top_scores = model_out[model_out.argsort()[-topn:]][::-1]
        top_labels = model_out.argsort()[-topn:][::-1]
        result = []
        for label, score in zip(top_labels, top_scores):
            result.append({
                'label': int(label),
                'score': float(score),
                'class': model.CLASSES[label]
            })
    return result
def test_randomresizedcrop():
    ori_img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'),
                          'color')
    ori_img_pil = Image.open(
        osp.join(osp.dirname(__file__), '../data/color.jpg'))

    seed = random.randint(0, 100)

    # test when scale is not of kind (min, max)
    with pytest.raises(ValueError):
        kwargs = dict(size=(200, 300),
                      scale=(1.0, 0.08),
                      ratio=(3. / 4., 4. / 3.))
        aug = []
        aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)])
        composed_transform = Compose(aug)
        results = dict()
        results['img'] = ori_img
        composed_transform(results)['img']

        # test when ratio is not of kind (min, max)
        with pytest.raises(ValueError):
            kwargs = dict(size=(200, 300),
                          scale=(0.08, 1.0),
                          ratio=(4. / 3., 3. / 4.))
            aug = []
            aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)])
            composed_transform = Compose(aug)
            results = dict()
            results['img'] = ori_img
            composed_transform(results)['img']

    # test crop size is int
    kwargs = dict(size=200, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.))
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([torchvision.transforms.RandomResizedCrop(**kwargs)])
    composed_transform = Compose(aug)
    baseline = composed_transform(ori_img_pil)

    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)])
    composed_transform = Compose(aug)
    # test __repr__()
    print(composed_transform)
    results = dict()
    results['img'] = ori_img
    img = composed_transform(results)['img']
    assert np.array(img).shape == (200, 200, 3)
    assert np.array(baseline).shape == (200, 200, 3)
    nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero())
    nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero())
    assert nonzero == nonzero_transform

    # test crop size < image size
    kwargs = dict(size=(200, 300), scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.))
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([torchvision.transforms.RandomResizedCrop(**kwargs)])
    composed_transform = Compose(aug)
    baseline = composed_transform(ori_img_pil)

    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)])
    composed_transform = Compose(aug)
    results = dict()
    results['img'] = ori_img
    img = composed_transform(results)['img']
    assert np.array(img).shape == (200, 300, 3)
    assert np.array(baseline).shape == (200, 300, 3)
    nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero())
    nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero())
    assert nonzero == nonzero_transform

    # test crop size > image size
    kwargs = dict(size=(600, 700), scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.))
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([torchvision.transforms.RandomResizedCrop(**kwargs)])
    composed_transform = Compose(aug)
    baseline = composed_transform(ori_img_pil)

    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)])
    composed_transform = Compose(aug)
    results = dict()
    results['img'] = ori_img
    img = composed_transform(results)['img']
    assert np.array(img).shape == (600, 700, 3)
    assert np.array(baseline).shape == (600, 700, 3)
    nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero())
    nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero())
    assert nonzero == nonzero_transform

    # test cropping the whole image
    kwargs = dict(size=(ori_img.shape[0], ori_img.shape[1]),
                  scale=(1.0, 2.0),
                  ratio=(1.0, 2.0))
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([torchvision.transforms.RandomResizedCrop(**kwargs)])
    composed_transform = Compose(aug)
    baseline = composed_transform(ori_img_pil)

    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)])
    composed_transform = Compose(aug)
    results = dict()
    results['img'] = ori_img
    img = composed_transform(results)['img']
    assert np.array(img).shape == (ori_img.shape[0], ori_img.shape[1], 3)
    assert np.array(baseline).shape == (ori_img.shape[0], ori_img.shape[1], 3)
    nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero())
    nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero())
    assert nonzero == nonzero_transform
    # assert_array_equal(ori_img, img)
    # assert_array_equal(np.array(ori_img_pil), np.array(baseline))

    # test central crop when in_ratio < min(ratio)
    kwargs = dict(size=(ori_img.shape[0], ori_img.shape[1]),
                  scale=(1.0, 2.0),
                  ratio=(2, 3))
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([torchvision.transforms.RandomResizedCrop(**kwargs)])
    composed_transform = Compose(aug)
    baseline = composed_transform(ori_img_pil)

    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)])
    composed_transform = Compose(aug)
    results = dict()
    results['img'] = ori_img
    img = composed_transform(results)['img']
    assert np.array(img).shape == (ori_img.shape[0], ori_img.shape[1], 3)
    assert np.array(baseline).shape == (ori_img.shape[0], ori_img.shape[1], 3)
    nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero())
    nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero())
    assert nonzero == nonzero_transform

    # test central crop when in_ratio > max(ratio)
    kwargs = dict(size=(ori_img.shape[0], ori_img.shape[1]),
                  scale=(1.0, 2.0),
                  ratio=(3. / 4, 1))
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([torchvision.transforms.RandomResizedCrop(**kwargs)])
    composed_transform = Compose(aug)
    baseline = composed_transform(ori_img_pil)

    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)])
    composed_transform = Compose(aug)
    results = dict()
    results['img'] = ori_img
    img = composed_transform(results)['img']
    assert np.array(img).shape == (ori_img.shape[0], ori_img.shape[1], 3)
    assert np.array(baseline).shape == (ori_img.shape[0], ori_img.shape[1], 3)
    nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero())
    nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero())
    assert nonzero == nonzero_transform

    # test different interpolation types
    for mode in ['nearest', 'bilinear', 'bicubic', 'area', 'lanczos']:
        kwargs = dict(size=(600, 700),
                      scale=(0.08, 1.0),
                      ratio=(3. / 4., 4. / 3.),
                      interpolation=mode)
        aug = []
        aug.extend([mmcls_transforms.RandomResizedCrop(**kwargs)])
        composed_transform = Compose(aug)
        results = dict()
        results['img'] = ori_img
        img = composed_transform(results)['img']
        assert img.shape == (600, 700, 3)
def test_randomcrop():
    ori_img = mmcv.imread(osp.join(osp.dirname(__file__), '../data/color.jpg'),
                          'color')
    ori_img_pil = Image.open(
        osp.join(osp.dirname(__file__), '../data/color.jpg'))
    seed = random.randint(0, 100)

    # test crop size is int
    kwargs = dict(size=200, padding=0, pad_if_needed=True, fill=0)
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([torchvision.transforms.RandomCrop(**kwargs)])
    composed_transform = Compose(aug)
    baseline = composed_transform(ori_img_pil)

    kwargs = dict(size=200, padding=0, pad_if_needed=True, pad_val=0)
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([mmcls_transforms.RandomCrop(**kwargs)])
    composed_transform = Compose(aug)

    # test __repr__()
    print(composed_transform)
    results = dict()
    results['img'] = ori_img
    img = composed_transform(results)['img']
    assert np.array(img).shape == (200, 200, 3)
    assert np.array(baseline).shape == (200, 200, 3)
    nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero())
    nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero())
    assert nonzero == nonzero_transform

    # test crop size < image size
    kwargs = dict(size=(200, 300), padding=0, pad_if_needed=True, fill=0)
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([torchvision.transforms.RandomCrop(**kwargs)])
    composed_transform = Compose(aug)
    baseline = composed_transform(ori_img_pil)

    kwargs = dict(size=(200, 300), padding=0, pad_if_needed=True, pad_val=0)
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([mmcls_transforms.RandomCrop(**kwargs)])
    composed_transform = Compose(aug)
    results = dict()
    results['img'] = ori_img
    img = composed_transform(results)['img']
    assert np.array(img).shape == (200, 300, 3)
    assert np.array(baseline).shape == (200, 300, 3)
    nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero())
    nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero())
    assert nonzero == nonzero_transform

    # test crop size > image size
    kwargs = dict(size=(600, 700), padding=0, pad_if_needed=True, fill=0)
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([torchvision.transforms.RandomCrop(**kwargs)])
    composed_transform = Compose(aug)
    baseline = composed_transform(ori_img_pil)

    kwargs = dict(size=(600, 700), padding=0, pad_if_needed=True, pad_val=0)
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([mmcls_transforms.RandomCrop(**kwargs)])
    composed_transform = Compose(aug)
    results = dict()
    results['img'] = ori_img
    img = composed_transform(results)['img']
    assert np.array(img).shape == (600, 700, 3)
    assert np.array(baseline).shape == (600, 700, 3)
    nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero())
    nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero())
    assert nonzero == nonzero_transform

    # test crop size == image size
    kwargs = dict(size=(ori_img.shape[0], ori_img.shape[1]),
                  padding=0,
                  pad_if_needed=True,
                  fill=0)
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([torchvision.transforms.RandomCrop(**kwargs)])
    composed_transform = Compose(aug)
    baseline = composed_transform(ori_img_pil)

    kwargs = dict(size=(ori_img.shape[0], ori_img.shape[1]),
                  padding=0,
                  pad_if_needed=True,
                  pad_val=0)
    random.seed(seed)
    np.random.seed(seed)
    aug = []
    aug.extend([mmcls_transforms.RandomCrop(**kwargs)])
    composed_transform = Compose(aug)
    results = dict()
    results['img'] = ori_img
    img = composed_transform(results)['img']

    assert np.array(img).shape == (img.shape[0], img.shape[1], 3)
    assert np.array(baseline).shape == (img.shape[0], img.shape[1], 3)
    nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero())
    nonzero_transform = len((img - np.array(baseline)[:, :, ::-1]).nonzero())
    assert nonzero == nonzero_transform
    assert_array_equal(ori_img, img)
    assert_array_equal(np.array(baseline), np.array(ori_img_pil))

    # test different padding mode
    for mode in ['constant', 'edge', 'reflect', 'symmetric']:
        kwargs = dict(size=(500, 600), padding=0, pad_if_needed=True, fill=0)
        kwargs['padding_mode'] = mode
        random.seed(seed)
        np.random.seed(seed)
        aug = []
        aug.extend([torchvision.transforms.RandomCrop(**kwargs)])
        composed_transform = Compose(aug)
        baseline = composed_transform(ori_img_pil)

        kwargs = dict(size=(500, 600),
                      padding=0,
                      pad_if_needed=True,
                      pad_val=0)
        random.seed(seed)
        np.random.seed(seed)
        aug = []
        aug.extend([mmcls_transforms.RandomCrop(**kwargs)])
        composed_transform = Compose(aug)
        results = dict()
        results['img'] = ori_img
        img = composed_transform(results)['img']
        assert np.array(img).shape == (500, 600, 3)
        assert np.array(baseline).shape == (500, 600, 3)
        nonzero = len((ori_img - np.array(ori_img_pil)[:, :, ::-1]).nonzero())
        nonzero_transform = len(
            (img - np.array(baseline)[:, :, ::-1]).nonzero())
        assert nonzero == nonzero_transform