def train_transform(rgb, depth):
    s = np.random.uniform(1.0, 1.5)  # random scaling
    # print("scale factor s={}".format(s))
    depth_np = depth / s
    angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
    do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

    # perform 1st part of data augmentation
    transform = transforms.Compose([
        transforms.Resize(
            250.0 / iheight
        ),  # this is for computational efficiency, since rotation is very slow
        transforms.Rotate(angle),
        transforms.Resize(s),
        transforms.CenterCrop((oheight, owidth)),
        transforms.HorizontalFlip(do_flip)
    ])
    rgb_np = transform(rgb)

    # random color jittering
    rgb_np = color_jitter(rgb_np)

    rgb_np = np.asfarray(rgb_np, dtype='float') / 255
    depth_np = transform(depth_np)

    return rgb_np, depth_np
示例#2
0
def train_transform(rgb, depth):
    s = np.random.uniform(1.0, 1.5)  # random scaling
    # print("scale factor s={}".format(s))
    depth_np = depth / s
    angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
    do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

    # set zeros in depth as NaN
    depth_np[depth_np == 0] = np.nan

    # perform 1st part of data augmentation
    transform = transforms.Compose([
        transforms.Resize(
            float(image_size) / iheight
        ),  # this is for computational efficiency, since rotation is very slow
        transforms.Rotate(angle),
        transforms.Resize(s),
        transforms.CenterCrop((oheight, owidth)),
        transforms.HorizontalFlip(do_flip),
    ])
    rgb_np = transform(rgb)

    # random color jittering
    rgb_np = color_jitter(rgb_np)

    rgb_np = np.asfarray(rgb_np, dtype='float') / 255
    rgb_np = normalize(rgb_np)  # from [0,1] to [-1,1]

    depth_np = transform(depth_np)
    depth_np[np.isnan(depth_np)] = 0

    depth_np = depth_np / 10.0

    return rgb_np, depth_np
    def train_transform(self, rgb: np.ndarray, depth_raw: np.ndarray,
                        depth_fix: np.ndarray) -> TNpData:
        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_raw = depth_raw / s
        depth_fix = depth_fix / s
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip
        # perform 1st part of data augmentation
        transform = transforms.Compose([
            transforms.Resize(
                250.0 / self.iheight
            ),  # this is for computational efficiency, since rotation is very slow
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop((self.oheight, self.owidth)),
            transforms.HorizontalFlip(do_flip)
        ])
        rgb = transform(rgb)

        # random color jittering
        rgb = color_jitter(rgb)

        rgb = np.asfarray(rgb, dtype='float') / 255
        depth_raw = transform(depth_raw)
        depth_fix = transform(depth_fix)

        return rgb, depth_raw, depth_fix
示例#4
0
def train(args):
    train_transforms = transforms.Compose([
        transforms.Resize(args.image_shape),
        transforms.RandomHorizontalFlip(),
        transforms.Normalize()
    ])

    eval_transforms = transforms.Compose(
        [transforms.Resize(args.image_shape),
         transforms.Normalize()])

    train_dataset = Dataset(
        data_dir=args.data_dir,
        file_list=args.train_list,
        transforms=train_transforms,
        num_workers='auto',
        buffer_size=100,
        parallel_method='thread',
        shuffle=True)

    eval_dataset = None
    if args.val_list is not None:
        eval_dataset = Dataset(
            data_dir=args.data_dir,
            file_list=args.val_list,
            transforms=eval_transforms,
            num_workers='auto',
            buffer_size=100,
            parallel_method='thread',
            shuffle=False)

    if args.model_type == 'HumanSegMobile':
        model = HumanSegMobile(num_classes=2)
    elif args.model_type == 'HumanSegLite':
        model = HumanSegLite(num_classes=2)
    elif args.model_type == 'HumanSegServer':
        model = HumanSegServer(num_classes=2)
    else:
        raise ValueError(
            "--model_type: {} is set wrong, it shold be one of ('HumanSegMobile', "
            "'HumanSegLite', 'HumanSegServer')".format(args.model_type))
    model.train(
        num_epochs=args.num_epochs,
        train_dataset=train_dataset,
        train_batch_size=args.batch_size,
        eval_dataset=eval_dataset,
        save_interval_epochs=args.save_interval_epochs,
        save_dir=args.save_dir,
        pretrained_weights=args.pretrained_weights,
        resume_weights=args.resume_weights,
        learning_rate=args.learning_rate,
        use_vdl=args.use_vdl)
示例#5
0
 def build_model(self):
     """ DataLoader """
     train_transform = transforms.Compose([
         transforms.RandomHorizontalFlip(),
         transforms.Resize((self.img_size + 30, self.img_size + 30)),
         transforms.RandomCrop(self.img_size),
         transforms.ToTensor(),
         transforms.Normalize(mean=0.5, std=0.5)
     ])
     test_transform = transforms.Compose([
         transforms.Resize((self.img_size, self.img_size)),
         transforms.ToTensor(),
         transforms.Normalize(mean=0.5, std=0.5)
     ])
     self.trainA_loader = paddle.batch(
         a_reader(shuffle=True, transforms=train_transform),
         self.batch_size)()
     self.trainB_loader = paddle.batch(
         b_reader(shuffle=True, transforms=train_transform),
         self.batch_size)()
     self.testA_loader = a_test_reader(transforms=test_transform)
     self.testB_loader = b_test_reader(transforms=test_transform)
     """ Define Generator, Discriminator """
     self.genA2B = ResnetGenerator(input_nc=3,
                                   output_nc=3,
                                   ngf=self.ch,
                                   n_blocks=self.n_res,
                                   img_size=self.img_size,
                                   light=self.light)
     self.genB2A = ResnetGenerator(input_nc=3,
                                   output_nc=3,
                                   ngf=self.ch,
                                   n_blocks=self.n_res,
                                   img_size=self.img_size,
                                   light=self.light)
     self.disGA = Discriminator(input_nc=3, ndf=self.ch, n_layers=7)
     self.disGB = Discriminator(input_nc=3, ndf=self.ch, n_layers=7)
     self.disLA = Discriminator(input_nc=3, ndf=self.ch, n_layers=5)
     self.disLB = Discriminator(input_nc=3, ndf=self.ch, n_layers=5)
     """ Define Loss """
     self.L1_loss = L1Loss()
     self.MSE_loss = MSELoss()
     self.BCE_loss = BCEWithLogitsLoss()
     """ Trainer """
     self.G_optim = self.optimizer_setting(self.genA2B.parameters() +
                                           self.genB2A.parameters())
     self.D_optim = self.optimizer_setting(self.disGA.parameters() +
                                           self.disGB.parameters() +
                                           self.disLA.parameters() +
                                           self.disLB.parameters())
     """ Define Rho clipper to constraint the value of rho in AdaILN and ILN"""
     self.Rho_clipper = RhoClipper(0, 1)
示例#6
0
def build_dataset(path_to_imgs, path_to_json_train, path_to_json_test):
    data_transforms = {
        'train':
        transforms.Compose([
            transforms.Resize(256),
            transforms.CustomCrop(),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val':
        transforms.Compose([
            transforms.Resize(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }
    TID_train = TIDLoader(path_to_imgs, path_to_json_train,
                          data_transforms['train'])
    TID_test = TIDLoader(path_to_imgs, path_to_json_test,
                         data_transforms['val'])
    """
	Since ground truth distributions were in train-test split (no val data),
	train data is split in two parts.Only 76 % of original train data is used 
	for training, remaining is added to original test data. THis test data is 
	then split into validation and test data.
	In short, the total number of images available in TID2013 (3k)
	are split in a ratio of 70-20-10 (train-val-test). 
	"""
    train_len = len(TID_train)
    idx = list(range(train_len))
    random.shuffle(idx)
    split_idx = idx[:int(0.76 * train_len)]
    train_split = Subset(TID_train, split_idx)

    split_idx = idx[int(0.76 * train_len):]
    train_val_split = Subset(TID_train, split_idx)
    val_split = ConcatDataset([train_val_split, TID_test])

    val_len = len(val_split)
    val_idx = list(range(val_len))
    random.shuffle(val_idx)
    val_split_idx = val_idx[:int(0.75 * val_len)]
    final_val_split = Subset(val_split, val_split_idx)

    test_split_idx = val_idx[int(0.75 * val_len):]
    test_split = Subset(val_split, test_split_idx)
    return train_split, final_val_split, test_split
    def __init__(self,
                 img_size: int,
                 original_size: int,
                 mean: float = 0,
                 std: float = 1,
                 brightness: float = 0.3,
                 contrast: float = 0.5,
                 saturation: float = 0.5,
                 hue: float = 0.3,
                 rotation_degree: int = 10,
                 hflip: float = 0.5,
                 debug: bool = False):

        self.original_size = original_size
        self.target_size = img_size
        self.to_pil = transforms.ToPILImage()
        self.color_jitter = transforms.ColorJitter(brightness=brightness,
                                                   contrast=contrast,
                                                   saturation=saturation,
                                                   hue=hue)
        self.resize = transforms.Resize(img_size)
        self.to_tensor = transforms.ToTensor()
        self.normalize = transforms.Normalize(mean, std)
        self.r_horizontal_flip = RandomHorizontalFlip(p=hflip)
        self.r_rotation = RandomRotation(rotation_degree)
        self.debug = debug
示例#8
0
def train_transform(rgb, depth):
    s = np.random.uniform(1.0, 1.5)  # random scaling
    # print("scale factor s={}".format(s))
    depth_np = depth / s
    angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
    do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

    # perform 1st part of data augmentation
    transform = transforms.Compose([
        transforms.Crop(130, 10, 240, 1200),
        transforms.Rotate(angle),
        transforms.Resize(s),
        transforms.CenterCrop((oheight, owidth)),
        transforms.HorizontalFlip(do_flip)
    ])
    rgb_np = transform(rgb)

    # random color jittering
    rgb_np = color_jitter(rgb_np)

    rgb_np = np.asfarray(rgb_np, dtype='float') / 255
    # Scipy affine_transform produced RuntimeError when the depth map was
    # given as a 'numpy.ndarray'
    depth_np = np.asfarray(depth_np, dtype='float32')
    depth_np = transform(depth_np)

    return rgb_np, depth_np
示例#9
0
def get_transform(train):
    transforms = [T.Resize((800, 800)), T.ToTensor()]
    #transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    #import pdb; pdb.set_trace()
    return T.Compose(transforms)
示例#10
0
def get(args):
    """ Entry point. Call this function to get all Charades dataloaders """
    normalize = arraytransforms.Normalize(mean=[0.502], std=[1.0])
    train_file = args.train_file
    val_file = args.val_file
    train_dataset = Charadesflow(args.data,
                                 'train',
                                 train_file,
                                 args.cache,
                                 transform=transforms.Compose([
                                     arraytransforms.RandomResizedCrop(224),
                                     arraytransforms.ToTensor(),
                                     normalize,
                                     transforms.Lambda(lambda x: torch.cat(x)),
                                 ]))
    val_transforms = transforms.Compose([
        arraytransforms.Resize(256),
        arraytransforms.CenterCrop(224),
        arraytransforms.ToTensor(),
        normalize,
        transforms.Lambda(lambda x: torch.cat(x)),
    ])
    val_dataset = Charadesflow(args.data,
                               'val',
                               val_file,
                               args.cache,
                               transform=val_transforms)
    valvideo_dataset = Charadesflow(args.data,
                                    'val_video',
                                    val_file,
                                    args.cache,
                                    transform=val_transforms)
    return train_dataset, val_dataset, valvideo_dataset
    def create_transforms(self):

        transforms_list = []

        if self.mode == 'pretrain_tnet':
            transforms_list.extend([
                transforms.RandomCrop(400),
                transforms.RandomRotation(180),
                transforms.RandomHorizontalFlip()
            ])
        if self.mode == 'pretrain_mnet':
            transforms_list.extend([
                transforms.RandomCrop(320),
            ])
        if self.mode == 'end_to_end':
            transforms_list.extend([
                transforms.RandomCrop(800),
            ])

        transforms_list.extend([
            transforms.Resize((self.patch_size, self.patch_size)),
            transforms.ToTensor()
        ])

        self.transforms = transforms.Compose(transforms_list)
示例#12
0
def get_data(data_dir,
             source,
             target,
             height,
             width,
             batch_size,
             re=0,
             workers=8):
    # def get_data(height, width):

    dataset = IU_X_RAY('./dataset/')

    normalizer = T.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])

    train_transformer = T.Compose([
        T.Resize((height, width), interpolation=3),
        T.ToTensor(),
        normalizer,
    ])

    train_sampler = MRG_sampler(dataset.source_train,
                                root=dataset.train_images_dir,
                                transform=train_transformer)
    val_sampler = MRG_sampler(dataset.source_valid,
                              root=dataset.valid_images_dir,
                              transform=train_transformer)
    test_sampler = MRG_sampler(dataset.source_test,
                               root=dataset.test_images_dir,
                               transform=train_transformer)

    mesh_term_list = list(
        set(train_sampler.mesh_term_list) & set(test_sampler.mesh_term_list))
    train_sampler.mesh_term_list = mesh_term_list
    test_sampler.mesh_term_list = mesh_term_list

    print('mesh_term:' + str(len(mesh_term_list)))

    train_dataloader = DataLoader(train_sampler,
                                  batch_size=batch_size,
                                  num_workers=workers,
                                  shuffle=True,
                                  pin_memory=True,
                                  drop_last=True)

    val_dataloader = DataLoader(val_sampler,
                                batch_size=batch_size,
                                num_workers=workers,
                                shuffle=True,
                                pin_memory=True,
                                drop_last=True)

    test_dataloader = DataLoader(test_sampler,
                                 batch_size=batch_size,
                                 num_workers=workers,
                                 shuffle=True,
                                 pin_memory=True,
                                 drop_last=True)

    return dataset, mesh_term_list, train_dataloader, val_dataloader, test_dataloader
示例#13
0
def get_transform(train, resolution):
    transforms = []

    # if square resolution, perform some aspect cropping
    # otherwise, resize to the resolution as specified
    if resolution[0] == resolution[1]:
        base_size = resolution[0] + 32 #520
        crop_size = resolution[0]      #480

        min_size = int((0.5 if train else 1.0) * base_size)
        max_size = int((2.0 if train else 1.0) * base_size)

        transforms.append(T.RandomResize(min_size, max_size))

        # during training mode, perform some data randomization
        if train:
            transforms.append(T.RandomHorizontalFlip(0.5))
            transforms.append(T.RandomCrop(crop_size))
    else:
        transforms.append(T.Resize(resolution))

        if train:
            transforms.append(T.RandomHorizontalFlip(0.5))

    transforms.append(T.ToTensor())
    transforms.append(T.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225]))

    return T.Compose(transforms)
示例#14
0
def get_transform(train):
    transforms = []
    transforms.append(T.Resize((imsize, imsize)))
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)
示例#15
0
def _setup_dataloaders(root_dir, return_dataset=False):
    """
    Setup dataloaders.
    """
    preprocessing = [
        aug.NormalizeBboxes(cfg.grid_size),
        aug.Bboxes2Matrices(cfg.grid_size, cfg.num_classes),
        aug.Resize(cfg.target_size),
        aug.Normalize(cfg.mean, cfg.std, 1. / 255),
        aug.ToTensor()
    ]
    transforms_train = preprocessing
    transforms_val = preprocessing

    ds_train = VOCDataset(root_dir, image_set="train")
    dl_train = get_dataloader(ds_train,
                              transforms_train,
                              cfg.batch_size,
                              num_workers=4)
    ds_val = VOCDataset(root_dir, image_set="val")
    dl_val = get_dataloader(ds_val, transforms_val, cfg.batch_size)

    if return_dataset:
        return dl_train, dl_val, ds_train, ds_val

    return dl_train, dl_val
示例#16
0
def get_iterator(mode):
    normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]])
    kwargs = {'num_workers': 4, 'pin_memory': True}
    transform_augment = transforms.Compose([
        # transforms.RandomResizedCrop(args.size, scale=(0.8, 1.2)),  # random scale 0.8-1 of original image area, crop to args.size
        transforms.RandomResizedCrop(size),
        transforms.RandomRotation(15),  # random rotation -15 to +15 degrees
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])
    transform = transforms.Compose([transforms.Resize((size, size)),
                                              transforms.ToTensor(),
                                              normalize,
                                              ])
    if mode:
        dataset = Dataset.MURA(split="train", transform=(transform_augment if augment else transform), type=type)
        loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             **kwargs)
    else:
        dataset = Dataset.MURA(split="test", transform=transform, type=type)
        loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             **kwargs)
    return loader
示例#17
0
def get_data_loaders(train_batch_size, val_batch_size):
    normalize = transforms.Normalize(mean=torch.Tensor([0.5]),
                                     std=torch.Tensor([0.2]))

    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(256),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ToTensor(),
        transforms.MultiplicativeGaussianNoise(1, 0.01), normalize
    ])

    val_transform = transforms.Compose(
        [transforms.Resize((512, 512)),
         transforms.ToTensor(), normalize])

    train_loader = DataLoader(DWTDataset('dataset',
                                         split='train',
                                         transform=train_transform),
                              batch_size=train_batch_size,
                              shuffle=True)

    val_loader = DataLoader(DWTDataset('dataset',
                                       split='valid',
                                       transform=val_transform),
                            batch_size=val_batch_size,
                            shuffle=False)

    return train_loader, val_loader
def test(valdir, bs, sz, rect_val=False):
    if rect_val:
        idx_ar_sorted = sort_ar(valdir)
        idx_sorted, _ = zip(*idx_ar_sorted)
        idx2ar = map_idx2ar(idx_ar_sorted, bs)

        ar_tfms = [transforms.Resize(int(sz * 1.14)), CropArTfm(idx2ar, sz)]
        val_dataset = ValDataset(valdir, transform=ar_tfms)
        return PaddleDataLoader(val_dataset,
                                concurrent=1,
                                indices=idx_sorted,
                                shuffle=False).reader()

    val_tfms = [transforms.Resize(int(sz * 1.14)), transforms.CenterCrop(sz)]
    val_dataset = datasets.ImageFolder(valdir, transforms.Compose(val_tfms))

    return PaddleDataLoader(val_dataset).reader()
示例#19
0
 def __init__(self, masked_paths: list, unmasked_paths: list,
              tgt_size: tuple):
     self.masked_paths = masked_paths
     self.unmasked_paths = unmasked_paths
     self.transform = T.Compose([
         T.ToPILImage(),
         T.ToTensor(),
         T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
         T.Resize(tgt_size)
     ])
def get(args):
    """ Entry point. Call this function to get all Charades dataloaders """
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_file = args.train_file
    val_file = args.val_file
    train_dataset = Charades(
        args.data,
        'train',
        train_file,
        args.cache,
        transform=transforms.Compose([
            transforms.RandomResizedCrop(args.inputsize),
            transforms.ColorJitter(brightness=0.4,
                                   contrast=0.4,
                                   saturation=0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),  # missing PCA lighting jitter
            normalize,
        ]))
    val_dataset = Charades(args.data,
                           'val',
                           val_file,
                           args.cache,
                           transform=transforms.Compose([
                               transforms.Resize(
                                   int(256. / 224 * args.inputsize)),
                               transforms.CenterCrop(args.inputsize),
                               transforms.ToTensor(),
                               normalize,
                           ]))
    valvideo_dataset = Charades(args.data,
                                'val_video',
                                val_file,
                                args.cache,
                                transform=transforms.Compose([
                                    transforms.Resize(
                                        int(256. / 224 * args.inputsize)),
                                    transforms.CenterCrop(args.inputsize),
                                    transforms.ToTensor(),
                                    normalize,
                                ]))
    return train_dataset, val_dataset, valvideo_dataset
示例#21
0
 def _to_torch(image: np.array) -> _t.Tuple[_torch.Tensor, tuple]:
     shape = image.shape
     transform = _torchvision.transforms.Compose(
         [_transforms.Resize(320), _transforms.ToTensor()]
     )
     image = transform(image)
     image.unsqueeze_(0)
     image = image.type(_torch.FloatTensor)
     image = Variable(image.cuda())
     return image, shape
示例#22
0
def get_transform(train):
    mean = [0.3297]
    std = [0.2566]
    transform = []

    transform.append(T.Resize(IMAGE_SIZE))
    if train:
        transform.append(T.RandomHorizontalFlip(0.5))
    transform.append(T.ToTensor())
    return T.Compose(transform)
 def val_transform(self, rgb: np.ndarray, depth_raw: np.ndarray,
                   depth_fix: np.ndarray) -> TNpData:
     # perform 1st part of data augmentation
     transform = transforms.Compose([
         transforms.Resize(240.0 / self.iheight),
         transforms.CenterCrop((self.oheight, self.owidth)),
     ])
     rgb = transform(rgb)
     rgb = np.asfarray(rgb, dtype='float') / 255
     depth_raw = transform(depth_raw)
     depth_fix = transform(depth_fix)
     return rgb, depth_raw, depth_fix
示例#24
0
def get_imgs(img_path, imsize, transform=None, normalize_img=None):
    if cfg.IMAGE_CHANNEL == 3:
        img = Image.open(img_path).convert('RGB')
    elif cfg.IMAGE_CHANNEL == 1:
        img = Image.open(img_path).convert('L')
    if transform is not None:
        img = transform(img)
    ret = []
    for i in range(cfg.TREE.BRANCH_NUM):
        re_img = transforms.Resize(imsize[i])(img)
        ret.append(normalize_img(re_img))
    return ret
示例#25
0
def load_data_transformers(resize_reso=512, crop_reso=448, swap_num=[7, 7]):
    center_resize = 600
    Normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
    data_transforms = {
        'swap':
        transforms.Compose([
            transforms.Randomswap((swap_num[0], swap_num[1])),
        ]),
        'common_aug':
        transforms.Compose([
            transforms.Resize((resize_reso, resize_reso)),
            transforms.RandomRotation(degrees=15),
            transforms.RandomCrop((crop_reso, crop_reso)),
            transforms.RandomHorizontalFlip(),
        ]),
        'train_totensor':
        transforms.Compose([
            transforms.Resize((crop_reso, crop_reso)),
            # ImageNetPolicy(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]),
        'val_totensor':
        transforms.Compose([
            transforms.Resize((crop_reso, crop_reso)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]),
        'test_totensor':
        transforms.Compose([
            transforms.Resize((resize_reso, resize_reso)),
            transforms.CenterCrop((crop_reso, crop_reso)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]),
        'None':
        None,
    }
    return data_transforms
def val_transform(rgb, depth):
    depth_np = depth

    # perform 1st part of data augmentation
    transform = transforms.Compose([
        transforms.Resize(240.0 / iheight),
        transforms.CenterCrop((oheight, owidth)),
    ])
    rgb_np = transform(rgb)
    rgb_np = np.asfarray(rgb_np, dtype='float') / 255
    depth_np = transform(depth_np)

    return rgb_np, depth_np
def get_transform(train):
    mean = [0.3297]
    std = [0.2566]
    transform = []

    transform.append(T.Resize(IMAGE_SIZE))
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transform.append(T.RandomHorizontalFlip(0.5))
    transform.append(T.ToTensor())
    # transform.append(T.Normalize(mean, std)) # Faster-RCNN Uses their own normalize function, so this deep fries the img if we did it twice
    return T.Compose(transform)
示例#28
0
def val_transform(rgb, depth):
    # perform 1st part of data augmentation
    transform = transforms.Compose([
        transforms.Resize(float(image_size) / iheight),
        transforms.CenterCrop((oheight, owidth)),
    ])
    rgb_np = transform(rgb)
    rgb_np = np.asfarray(rgb_np, dtype='float') / 255
    rgb_np = normalize(rgb_np)  # from [0,1] to [-1,1]

    depth_np = transform(depth)
    depth_np = depth_np / 10.0

    return rgb_np, depth_np
示例#29
0
def train_transform(rgb, depth):
    s = np.random.uniform(1.0, 1.5)  # random scaling
    # print("scale factor s={}".format(s))
    depth_np = depth / s
    angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
    do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

    # perform 1st part of data augmentation
    transform = transforms.Compose([
        #transforms.Resize(530 / iheight), # this is for computational efficiency, since rotation is very slow
        transforms.Resize(250 / iheight),
        transforms.Rotate(angle),
        transforms.Resize(s),
        transforms.CenterCrop((oheight, owidth)),
        transforms.HorizontalFlip(do_flip)
    ])

    rgb_np = transform(rgb)
    # 自己添加
    # rgb_np = Transform.resize(rgb_np, [512, 512])
    rgb_np = cv2.resize(rgb_np, (512, 512), interpolation=cv2.INTER_NEAREST)
    ###########
    # random color jittering
    rgb_np = color_jitter(rgb_np)

    rgb_np = np.asfarray(rgb_np, dtype='float') / 255
    depth_np = transform(depth_np)

    #自己添加
    depth_np = cv2.resize(depth_np, (512, 512),
                          interpolation=cv2.INTER_NEAREST)
    #depth_np=Transform.resize(depth_np,[512,512])
    ###########
    #data=rgb_np*255
    #data=Image.fromarray(data.astype(np.uint8))
    #data.show()
    return rgb_np, depth_np
示例#30
0
def get_imgs_test(img_path, imsize, transform=None, normalize_img=None):
    if cfg.IMAGE_CHANNEL == 3:
        img = Image.open(img_path).convert('RGB')
    elif cfg.IMAGE_CHANNEL == 1:
        img = Image.open(img_path).convert('L')
    width, height = img.size

    if transform is not None:
        img = transform(img)

    ret = []
    re_img = transforms.Resize(imsize[-1])(img)
    ret.append(normalize_img(re_img))

    return ret