示例#1
0
def load_data_train_fix_from_lmdb(batch_size, dataset = 'Faces_emore'):
    
    transform = transforms.Compose([
        #transforms.RandomHorizontalFlip(),
        # transforms.Resize((120, 120), interpolation=3),
        # transforms.RandomCrop(112),
        transforms.ToTensor(),  # range [0, 255] -> [0.0,1.0]
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])  # range [0.0, 1.0] -> [-1.0,1.0]
    
    train_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        Cutout(n_holes=1, length=16),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    
    root = '/root/faces_emore/LFW/lfw_align_112/'
    file_list = '/root/faces_emore/LFW/pairs.txt'
    dataset_LFW = LFW(root, file_list, transform=transform)
    
    root = '/root/faces_emore/cfp_fp/'
    file_list = '/root/faces_emore/cfp_fp/cfp_fp_pair.txt'
    dataset_CFP_FP = CFP_FP(root, file_list, transform=transform)
        
    root = '/root/faces_emore/agedb_30/'
    file_list = '/root/faces_emore/agedb_30/agedb_30_pair.txt'
    dataset_AgeDB30 = AgeDB30(root, file_list, transform=transform)  
    
    if dataset == 'CASIA':
        
        root = 'data_set/CASIA_Webface_Image'
        file_list = 'data_set/CASIA_Webface_Image/webface_align_112.txt'
        dataset_train = CASIAWebFace(root, file_list, transform=transform)
        
    elif dataset == 'Faces_emore':

        path = "/data/face_dataset/ms1m.lmdb"
        dataset_train = ImageFolderLMDB(path, transform) 
        train_sampler = torch.utils.data.distributed.DistributedSampler(dataset_train)
    else:
        raise NameError('no training data exist!')

    dataset_size = len(dataset_train)
    
    
    dataloaders = {'train_dataset': data.DataLoader(dataset_train, batch_size=batch_size,pin_memory=True, sampler=train_sampler),
                   'LFW': data.DataLoader(dataset_LFW, batch_size=batch_size,pin_memory=True, shuffle=False),
                   'CFP_FP': data.DataLoader(dataset_CFP_FP, batch_size=batch_size,pin_memory=True, shuffle=False),
                   'AgeDB30': data.DataLoader(dataset_AgeDB30, batch_size=batch_size,pin_memory=True, shuffle=False)}
    
    dataset = {'train_dataset': dataset_train,'LFW': dataset_LFW,
               'CFP_FP': dataset_CFP_FP, 'AgeDB30': dataset_AgeDB30}
    
    dataset_sizes = {'train': len(dataset_train),'LFW': len(dataset_LFW),
                     'CFP_FP': len(dataset_CFP_FP), 'AgeDB30': len(dataset_AgeDB30)}
    
    print('training and validation data loaded')
    
    return dataloaders, dataset_sizes, dataset
示例#2
0
def load_data_with_MXNet_dali(batch_size ,args ,dataset = 'Faces_emore'):
    transform = transforms.Compose([
        transforms.ToTensor(),  
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])  
    
    root = '/data/face_dataset/LFW/lfw_align_112'
    file_list = '/data/face_dataset/LFW/pairs.txt'
    dataset_LFW = LFW(root, file_list, transform=transform)
    
    root = '/data/face_dataset/CFP-FP/CFP_FP_aligned_112'
    file_list = '/data/face_dataset/CFP-FP/cfp_fp_pair.txt'
    dataset_CFP_FP = CFP_FP(root, file_list, transform=transform)
        
    root = '/data/face_dataset/AgeDB-30/agedb30_align_112'
    file_list = '/data/face_dataset/AgeDB-30/agedb_30_pair.txt'
    dataset_AgeDB30 = AgeDB30(root, file_list, transform=transform)  
    
    if dataset == 'CASIA':
        root = 'data_set/CASIA_Webface_Image'
        file_list = 'data_set/CASIA_Webface_Image/webface_align_112.txt'
        dataset_train = CASIAWebFace(root, file_list, transform=transform)
    elif dataset == 'Faces_emore':
        path = "/data/face_dataset/"
        pipes = MXNetReaderPipeline(path,batch_size=batch_size, num_threads=4, device_id = args.local_rank, num_gpus = 2) 
        pipes.build()
        train_loader = DALIGenericIterator(pipes, ['data', 'label'], pipes.epoch_size("Reader"))

        root = '/data/face_dataset/imgs/'
        file_list = '/data/face_dataset/imgs/faces_emore_align_112.txt'
        dataset_train = MS1M(root, file_list, transform=transform)
    else:
        raise NameError('no training data exist!')

    dataloaders = {'train_dataset': train_loader,
                   'LFW': data.DataLoader(dataset_LFW, batch_size=batch_size,pin_memory=True, shuffle=False),
                   'CFP_FP': data.DataLoader(dataset_CFP_FP, batch_size=batch_size,pin_memory=True, shuffle=False),
                   'AgeDB30': data.DataLoader(dataset_AgeDB30, batch_size=batch_size,pin_memory=True, shuffle=False)}
    
    dataset = {'train_dataset': dataset_train,'LFW': dataset_LFW,
               'CFP_FP': dataset_CFP_FP, 'AgeDB30': dataset_AgeDB30}
    
    dataset_sizes = {'train': len(dataset_train),'LFW': len(dataset_LFW),
                     'CFP_FP': len(dataset_CFP_FP), 'AgeDB30': len(dataset_AgeDB30)}
    
    print('training and validation data loaded')
    
    return dataloaders, dataset_sizes, dataset
示例#3
0
def load_data_dataparallel(batch_size, dataset = 'Faces_emore'):
    
    transform = transforms.Compose([
        #transforms.RandomHorizontalFlip(),
        # transforms.Resize((120, 120), interpolation=3),
        # transforms.RandomCrop(112),
        transforms.ToTensor(),  # range [0, 255] -> [0.0,1.0]
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])  # range [0.0, 1.0] -> [-1.0,1.0]
    
    train_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(),
        transforms.Resize((120, 120), interpolation=3),
        transforms.RandomCrop(112),
        transforms.ToTensor(),
        Cutout(n_holes=1, length=16),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    
    root = '/data/face_dataset/LFW/lfw_align_112'
    file_list = '/data/face_dataset/LFW/pairs.txt'
    dataset_LFW = LFW(root, file_list, transform=transform)
    
    root = '/data/face_dataset/CFP-FP/CFP_FP_aligned_112'
    file_list = '/data/face_dataset/CFP-FP/cfp_fp_pair.txt'
    dataset_CFP_FP = CFP_FP(root, file_list, transform=transform)
        
    root = '/data/face_dataset/AgeDB-30/agedb30_align_112'
    file_list = '/data/face_dataset/AgeDB-30/agedb_30_pair.txt'
    dataset_AgeDB30 = AgeDB30(root, file_list, transform=transform)  
    
    if dataset == 'CASIA':
        
        root = 'data_set/CASIA_Webface_Image'
        file_list = 'data_set/CASIA_Webface_Image/webface_align_112.txt'
        dataset_train = CASIAWebFace(root, file_list, transform=transform)
        
    elif dataset == 'Faces_emore':

        root = '/data/face_dataset/imgs'
        file_list = '/data/face_dataset/imgs/faces_emore_align_112.txt'
        dataset_train = MS1M(root, file_list, transform=train_transform) 
        # train_sampler = torch.utils.data.distributed.DistributedSampler(dataset_train)
    
    else:
        raise NameError('no training data exist!')

    dataset_size = len(dataset_train)
    train_size = int(0.8 * dataset_size)
    valid_size = dataset_size - train_size
    train_subset, valid_subset = torch.utils.data.random_split(dataset_train, [train_size, valid_size])
    
    # 'train_dataset': data.DataLoader(dataset_train, batch_size=batch_size,  sampler=train_sampler),
    dataloaders = {'train_dataset': data.DataLoader(dataset_train, batch_size=batch_size,  shuffle=True),
                   'train_subset': data.DataLoader(train_subset, batch_size=batch_size, shuffle=True),
                   'valid_subset': data.DataLoader(valid_subset, batch_size=batch_size,  shuffle=True),
                   'LFW': data.DataLoader(dataset_LFW, batch_size=batch_size, shuffle=False),
                   'CFP_FP': data.DataLoader(dataset_CFP_FP, batch_size=batch_size, shuffle=False),
                   'AgeDB30': data.DataLoader(dataset_AgeDB30, batch_size=batch_size, shuffle=False)}
    
    dataset = {'train_dataset': dataset_train,'train_subset': train_subset,'valid_subset': valid_subset,'LFW': dataset_LFW,
               'CFP_FP': dataset_CFP_FP, 'AgeDB30': dataset_AgeDB30}
    
    dataset_sizes = {'train': len(dataset_train),'train_subset':len(train_subset),'valid_subset':len(valid_subset),'LFW': len(dataset_LFW),
                     'CFP_FP': len(dataset_CFP_FP), 'AgeDB30': len(dataset_AgeDB30)}
    
    print('training and validation data loaded')
    
    return dataloaders, dataset_sizes, dataset