def __init__(self, root, output_size, imset='2017/train.txt', clip_n=3, max_obj_n=11): self.root = root self.clip_n = clip_n self.output_size = output_size self.max_obj_n = max_obj_n dataset_path = os.path.join(root, 'ImageSets', imset) self.dataset_list = list() with open(os.path.join(dataset_path), 'r') as lines: for line in lines: dataset_name = line.strip() if len(dataset_name) > 0: self.dataset_list.append(dataset_name) self.random_horizontal_flip = mytrans.RandomHorizontalFlip(0.3) self.color_jitter = TF.ColorJitter(0.1, 0.1, 0.1, 0.02) self.random_affine = mytrans.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.95, 1.05), shear=10) self.random_resize_crop = mytrans.RandomResizedCrop( output_size, (0.8, 1), (0.95, 1.05)) self.to_tensor = TF.ToTensor() self.to_onehot = mytrans.ToOnehot(max_obj_n, shuffle=True)
def __init__(self, root, output_size, dataset_file='meta.json', clip_n=3, max_obj_n=11): self.root = root self.clip_n = clip_n self.output_size = output_size self.max_obj_n = max_obj_n dataset_path = os.path.join(root, dataset_file) with open(dataset_path, 'r') as json_file: meta_data = json.load(json_file) self.dataset_list = list(meta_data['videos']) self.dataset_size = len(self.dataset_list) self.random_horizontal_flip = mytrans.RandomHorizontalFlip(0.3) self.color_jitter = TF.ColorJitter(0.1, 0.1, 0.1, 0.02) self.random_affine = mytrans.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.95, 1.05), shear=10) self.random_resize_crop = mytrans.RandomResizedCrop( output_size, (0.3, 0.5), (0.95, 1.05)) self.to_tensor = TF.ToTensor() self.to_onehot = mytrans.ToOnehot(max_obj_n, shuffle=True)
def trans(is_training = True): transforms = [] transforms.append(T.ToTensor()) if is_training: transforms.append(T.RandomHorizontalFlip(0.5)) return T.Compose(transforms)
def make_coco_transforms(image_set): normalize = T.Compose([T.ToTensor()]) if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(0.5), normalize, ]) if image_set == 'val': return T.Compose([ normalize, ]) raise ValueError(f'unknown {image_set}')
def __init__(self, root, output_size, dataset_file='./assets/pretrain.txt', clip_n=3, max_obj_n=11): self.root = root self.clip_n = clip_n self.output_size = output_size self.max_obj_n = max_obj_n self.img_list = list() self.mask_list = list() dataset_list = list() with open(os.path.join(dataset_file), 'r') as lines: for line in lines: dataset_name = line.strip() img_dir = os.path.join(root, 'JPEGImages', dataset_name) mask_dir = os.path.join(root, 'Annotations', dataset_name) img_list = sorted(glob(os.path.join(img_dir, '*.jpg'))) + sorted(glob(os.path.join(img_dir, '*.png'))) mask_list = sorted(glob(os.path.join(mask_dir, '*.png'))) if len(img_list) > 0: if len(img_list) == len(mask_list): dataset_list.append(dataset_name) self.img_list += img_list self.mask_list += mask_list print(f'\t{dataset_name}: {len(img_list)} imgs.') else: print(f'\tPreTrain dataset {dataset_name} has {len(img_list)} imgs and {len(mask_list)} annots. Not match! Skip.') else: print(f'\tPreTrain dataset {dataset_name} doesn\'t exist. Skip.') print(myutils.gct(), f'{len(self.img_list)} imgs are used for PreTrain. They are from {dataset_list}.') self.random_horizontal_flip = mytrans.RandomHorizontalFlip(0.3) self.color_jitter = TF.ColorJitter(0.1, 0.1, 0.1, 0.03) self.random_affine = mytrans.RandomAffine(degrees=20, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10) self.random_resize_crop = mytrans.RandomResizedCrop(output_size, (0.8, 1)) self.to_tensor = TF.ToTensor() self.to_onehot = mytrans.ToOnehot(max_obj_n, shuffle=True)
def get_transform(train): transforms = [] transforms.append(T.ToTensor()) if train: transforms.append(T.RandomHorizontalFlip(0.5)) return T.Compose(transforms)
# trainloader = torch.utils.data.DataLoader(CSDataSet(args.data_dir, './dataset/list/cityscapes/train.lst', max_iters=args.num_steps*args.batch_size, crop_size=(h, w), # scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN), # batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True) # valloader = torch.utils.data.DataLoader(CSDataSet(args.data_dir, './dataset/list/cityscapes/val.lst', crop_size=(1024, 2048), mean=IMG_MEAN, scale=False, mirror=False), # batch_size=2, shuffle=False, pin_memory=True) value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * 255 for item in mean] std = [0.229, 0.224, 0.225] std = [item * 255 for item in std] train_transform = my_trans.Compose([ # my_trans.Resize((args.height, args.width)), # my_trans.RandScale([0.5, 2.0]), # my_trans.RandomGaussianBlur(), my_trans.RandomHorizontalFlip(), # my_trans.Crop([args.height, args.width],crop_type='rand', padding=mean, ignore_label=255), my_trans.ToTensor(), # without div 255 my_trans.Normalize(mean=mean, std=std) ]) val_transform = my_trans.Compose([ # my_trans.Resize((args.height, args.width)), my_trans.ToTensor(), # without div 255 my_trans.Normalize(mean=mean, std=std) ]) data_dir = '/data/zzg/CamVid/' train_dataset = CamVid(data_dir, mode='train', p=None, transform=train_transform)
default='cuda:0', help='cpu or cuda:0 or cuda:1') args = parser.parse_args() if string is None else parser.parse_args(string) return args if __name__ == '__main__': args = parse_args() wandb.init(config=args, project='dlcv_gan_face') transform = transforms.Compose([ transforms.Resize(args.img_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.5] * 3, [0.5] * 3) ]) train_dataset = Face_Dataset('../hw3_data/face/train', transform) valid_dataset = Face_Dataset('../hw3_data/face/test', transform) train_dataloader = DataLoader(train_dataset, batch_size=args.batch, shuffle=True, num_workers=args.num_workers) valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch, num_workers=args.num_workers) train(args, train_dataloader, valid_dataloader)