示例#1
0
def create_numpy_dataset(num_images, train_loader, take_count=-1):
    datasets = []
    labels = []
    if num_images is None:
        num_images = len(train_loader)
    for i, data in enumerate(train_loader):
        data_numpy = data[0].numpy()
        label_numpy = data[1].numpy()
        label_numpy = np.squeeze(label_numpy)
        data_numpy = np.squeeze(data_numpy)
        datasets.append(data_numpy)
        labels.append(label_numpy)
        if i == (num_images - 1):
            break
    datasets = np.array(datasets)
    labels = np.array(labels)

    if len(datasets.shape) == 3:  # the input image is grayscale image
        datasets = np.expand_dims(datasets, axis=1)

    print('Numpy dataset shape is {}'.format(datasets.shape))
    if take_count != -1:
        return datasets[:take_count], labels[:take_count]
    else:
        return datasets, labels
示例#2
0
    def load_dataset(self, split, **kwargs):
        """Load a given dataset split.
        Args:
            split (str): name of the split (e.g., train, valid, test)
        """
        path = self.args.data
        if not os.path.exists(path):
            raise FileNotFoundError("Dataset not found: ({})".format(path))

        files = [os.path.join(path, f)
                 for f in os.listdir(path)] if os.path.isdir(path) else [path]
        files = sorted([f for f in files if split in f])

        # # debug
        if self.args.num_file > 0:
            files = files[0:self.args.num_file]

        assert len(files) > 0, "no suitable file in split ***{}***".format(
            split)

        datasets = []
        for i, f in enumerate(files):
            datasets.append(BertH5pyData(f))

        dataset = ConBertH5pyData(datasets)

        print('| loaded {} sentences from: {}'.format(len(dataset), path),
              flush=True)

        self.datasets[split] = dataset
        print('| loading finished')
示例#3
0
    def generate_curves(
            self,
            kernel_name,
            noise,
            num_instances_train=None,
            num_instances_vali=None,
            num_instances_test=None,
            purpose=None, **kwargs):

        x_values = self._create_shuffled_linspace()
        kernel = torch.tensor(
            pairwise.pairwise_kernels(
                x_values, x_values,
                kernel_name, kwargs),
            dtype=torch.float64)

        kernel = kernel + torch.eye(self._steps) * noise
        datasets = []
        if purpose == 'train':
            num_instances = num_instances_train
        elif purpose == 'vali':
            num_instances = num_instances_vali
        elif purpose == 'test':
            num_instances = num_instances_test

        for _ in range(num_instances):
            # creating as many standard
            standard_normals = torch.normal(0, 1, (self._steps, 1))
            func_x = kernel.float() @ standard_normals.float()
            datasets.append(func_x)

        datasets = Helper.list_np_to_tensor(datasets)
        x_values = x_values.repeat(datasets.shape[0], 1, 1)

        return x_values, datasets
示例#4
0
    def generate_curves(self, noise=1e-4, length_scale=0.4, gamma=1,
                        num_instances_train=None, num_instances_vali=None,
                        num_instances_test=None, purpose=None):

        datasets = []
        if purpose == 'train':
            num_instances = num_instances_train
            x_values = Helper.scale_shift_uniform(
                self._xmin, self._xmax, *(self._steps, self._xdim)).float()

        elif purpose == 'vali':
            num_instances = num_instances_vali
            x_values = self._create_shuffled_linspace()

        elif purpose == 'test':
            num_instances = num_instances_test
            x_values = self._create_shuffled_linspace()

        kernel = self._rbf_kernel(length_scale, gamma, x_values)
        kernel = kernel + torch.eye(self._steps) * noise
        cholesky_decomp = torch.cholesky(kernel)

        for _ in range(num_instances):
            # creating as many standard
            standard_normals = torch.normal(0, 1, (self._steps, 1))
            func_x = cholesky_decomp @ standard_normals
            datasets.append(func_x)

        datasets = Helper.list_np_to_tensor(datasets)
        x_values = x_values.repeat(datasets.shape[0], 1, 1)

        return x_values, datasets
def split_dataset_by_labels(X,
                            y,
                            task_labels,
                            nb_classes=None,
                            multihead=False):
    """Split dataset by labels.

    Args:
        X: data
        y: labels
        task_labels: list of list of labels, one for each dataset
        nb_classes: number of classes (used to convert to one-hot)
    Returns:
        List of (X, y) tuples representing each dataset
    """
    if nb_classes is None:
        nb_classes = len(np.unique(y))
    datasets = []
    for labels in task_labels:
        idx = np.in1d(y, labels)
        if multihead:
            label_map = np.arange(nb_classes)
            label_map[labels] = np.arange(len(labels))
            data = X[idx], np_utils.to_categorical(label_map[y[idx]],
                                                   len(labels))
        else:
            data = X[idx], np_utils.to_categorical(y[idx], nb_classes)
        datasets.append(data)
    return datasets
示例#6
0
def build_set(queue, triplet_builder, log):
    while 1:
        datasets = []
        for i in range(15):
            dataset = triplet_builder.build_set()
            datasets.append(dataset)
        dataset = ConcatDataset(datasets)
        # log.info('Created {0} triplets'.format(len(dataset)))
        queue.put(dataset)
示例#7
0
 def build_set(self, queue, triplet_builder, log):
     while 1:
         datasets = []
         for i in range(TRAIN_SEQS_PER_EPOCH):
             dataset = triplet_builder.build_set()
             datasets.append(dataset)
         dataset = ConcatDataset(datasets)
         # log.info('Created {0} triplets'.format(len(dataset)))
         queue.put(dataset)
示例#8
0
def create_numpy_dataset():
    datasets = []
    for data in train_loader:
        data_numpy = data[0].numpy()
        data_numpy = np.squeeze(data_numpy)
        datasets.append(data_numpy)
    datasets = np.array(datasets)
    datasets=np.expand_dims(datasets,axis=1)
    print('Numpy dataset shape is {}'.format(datasets.shape))
    return datasets[:1000]
def split_dataset_by_labels(dataset, task_labels):
    datasets = []
    task_idx = 0
    for labels in task_labels:
        idx = np.in1d(dataset.targets, labels)
        splited_dataset = copy.deepcopy(dataset)
        splited_dataset.targets = splited_dataset.targets[idx]
        splited_dataset.data = splited_dataset.data[idx]
        datasets.append(splited_dataset)
        task_idx += 1
    return datasets
示例#10
0
def load_data(path='data/pan19-author-profiling-20200229/training/en/',
              num_classes=3):
    '''
    Reads tweets and truth from the given path. Returns a list of lists with tweets of a users 
    and a list of the gender of the corresponding user (labels)
    
    Parameters:
    path (str): path of the dataset

    Returns:
    (list, list): tuple of texts and genders
    '''
    datasets = []
    for file in os.listdir(path):
        if file.endswith(".xml"):
            datasets.append(file)

    tweets = {}
    for dataset in datasets:
        root = ET.parse(os.path.join(path, dataset)).getroot()
        tweet_texts = []
        # get text from tweets
        for type_tag in root.findall('documents/document'):
            text = type_tag.text
            tweet_texts.append(text)
        user_id = re.findall(r"(.*)\.xml", dataset)[0]
        tweets[user_id] = tweet_texts

    labels = {}
    # get truth
    with open(os.path.join(path, 'truth.txt')) as f:
        for line in f:
            user_id, _, gender = re.findall(
                r'([A-Za-z0-9]*):::(human|bot):::([a-z]*)', line)[0]
            labels[user_id] = gender

    # create lists for input and output
    x, y = ([] for i in range(2))

    # torch needs integer as output class
    # if num_classes==3:
    #     class_lables = {"bot":0, "female":1, "male":2}
    # else:
    #     class_lables = {"bot":0, "female":1, "male":1}
    class_lables = {"bot": 0, "female": 1, "male": 2}

    for key, value in tweets.items():
        x.append(value)
        y.append(class_lables[labels[key]])

    return x, y
def append_adversarial_samples(args, data_loader, adv_data, adv_labels):
    datasets = data_loader.dataset.datasets

    dataset_adv = AdvDataset(adv_data, adv_labels)
    datasets.append(dataset_adv)

    dataset = ConcatDataset(datasets)
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=4,
                                         pin_memory=True,
                                         drop_last=True)
    return loader
示例#12
0
def create_numpy_dataset(num_images, train_loader):
    datasets = []
    if num_images is None:
        num_images = len(train_loader)
    for i, data in enumerate(train_loader):
        data_numpy = data[0].numpy()
        data_numpy = np.squeeze(data_numpy)
        datasets.append(data_numpy)
        if i == (num_images - 1):
            break
    datasets = np.array(datasets)
    if len(datasets.shape) == 3:  # the input image is grayscale image
        datasets = np.expand_dims(datasets, axis=1)
    return datasets
示例#13
0
def create_numpy_dataset(opt):
    '''
    @ Original 28x28 is rescaled to 32x32 to meet 2^P size
    @ batch_size and workders can be increased for faster loading
    '''
    print torch.__version__
    train_batch_size = opt.train_batch_size
    test_batch_size = opt.test_batch_size
    kwargs = {}
    train_loader = data_utils.DataLoader(MNIST(root='./data',
                                               train=True,
                                               process=False,
                                               transform=transforms.Compose([
                                                   transforms.Scale((32, 32)),
                                                   transforms.ToTensor(),
                                               ])),
                                         batch_size=train_batch_size,
                                         shuffle=True,
                                         **kwargs)

    test_loader = data_utils.DataLoader(MNIST(root='./data',
                                              train=False,
                                              process=False,
                                              transform=transforms.Compose([
                                                  transforms.Scale((32, 32)),
                                                  transforms.ToTensor(),
                                              ])),
                                        batch_size=test_batch_size,
                                        shuffle=True,
                                        **kwargs)

    # create numpy dataset
    datasets = []
    labels = []
    for data, label in train_loader:
        data_numpy = data.numpy()
        label_numpy = label.numpy()
        datasets.append(data_numpy)
        labels.append(label_numpy)
        datasets.append(data_numpy)

    datasets = np.concatenate(datasets, axis=0)
    labels = np.concatenate(labels, axis=0)
    print 'Create numpy dataset done, size: {}'.format(datasets.shape)
    return datasets[:opt.loadSize], labels[:opt.loadSize]
def get_train_dataloader(args):
    dataset_list = args.source
    assert isinstance(dataset_list, list)
    datasets = []
    val_datasets = []
    img_transformer = get_train_transformers(args)
    limit = args.limit_source

    for dname in dataset_list:
        if dname in digits_datasets:
            return get_digital_train_dataloader(args, dname)
        name_train, name_val, labels_train, labels_val = get_split_dataset_info(
            join(dirname(__file__), 'txt_lists', '%s_train.txt' % dname),
            args.val_size)
        train_dataset = JigsawDataset(name_train,
                                      labels_train,
                                      img_transformer=img_transformer)
        if limit:
            train_dataset = Subset(train_dataset, limit)
        datasets.append(train_dataset)
        val_datasets.append(
            JigsawDataset(name_val,
                          labels_val,
                          img_transformer=get_val_transformer(args)))
    dataset = ConcatDataset(datasets)
    val_dataset = ConcatDataset(val_datasets)
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=4,
                                         pin_memory=True,
                                         drop_last=True)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=4,
                                             pin_memory=True,
                                             drop_last=False)
    return loader, val_loader
示例#15
0
    def _split_dataset(self, dataset, class_split_order):
        task_length = len(class_split_order) // args.num_tasks

        # Used to map from random task_length classes in {0...1000} -> {0,1...task_length}
        tiled_class_map = np.tile(np.arange(task_length), args.num_tasks)
        inv_class_split_order = np.argsort(class_split_order)
        class_map = tiled_class_map[inv_class_split_order]

        # Constructing class splits
        paths, targets = zip(*dataset.samples)

        paths = np.array(paths)
        targets = np.array(targets)

        print("==> Extracting per class paths")
        class_samples = [
            list(zip(paths[targets == c], class_map[targets[targets == c]]))
            for c in range(1000)
        ]

        datasets = []

        print(f"==> Splitting dataset into {args.num_tasks} tasks")
        for i in range(0, 1000, task_length):
            task_classes = class_split_order[i : i + task_length]

            samples = []

            for c in task_classes:
                samples.append(class_samples[c])

            redataset = copy(dataset)
            redataset.samples = list(chain.from_iterable(samples))

            datasets.append(redataset)

        return datasets
示例#16
0
def load_dataset(path):
    """Load dataset in local.

    Load test image in local.
    Folder Tree

           Path
            ├─ Label 1
            |   ├─ Image 1
            |   ├─ Image 2
            |   └─ Image 3
            |
            └─ Label 2
                └─ Image 1

    Args:
        path (str): Directory path of dataset in Local

    Returns:
        List[(tensor.torch, int)]: dataset

    Example::
        get_dataset(./hoge)

    """
    datasets = []
    labels = os.listdir(path)
    for label in tqdm(labels, desc=" Label ", ascii=True):
        files = os.listdir(path + "/" + label)
        for file in tqdm(files, desc=" Data  ", ascii=True):
            img = Image.open(path + "/" + label + "/" + file).convert("L")
            torch_img = transforms.ToTensor()(img)
            data = (torch_img, int(label))
            datasets.append(data)
    print()
    return datasets
示例#17
0
def separate_datasets(loader,
                      dataset_type,
                      Ktasks,
                      folder,
                      use_task_ids=False):
    fts = []
    labels = []
    task_labels = []

    if use_task_ids:
        for i, (ft, tar, task) in enumerate(loader):
            fts.append(ft)
            labels.append(tar)
            task_labels.append(task)
    else:
        for i, (ft, tar) in enumerate(loader):
            fts.append(ft)
            labels.append(tar)
            task_labels.append(tar)

    all_fts = torch.cat(fts, dim=0)
    all_labels = torch.cat(labels, dim=0)
    all_task_labels = torch.cat(task_labels, dim=0)

    datasets = []
    for task_lb in range(Ktasks):
        mask = torch.eq(all_task_labels, task_lb)
        inds = torch.nonzero(mask).squeeze()
        dt = torch.index_select(all_fts, dim=0, index=inds)
        lbls = torch.index_select(all_labels, dim=0, index=inds)

        datasets.append(data_utils.TensorDataset(dt, lbls))

    if not os.path.exists(folder):
        os.mkdir(folder)
    torch.save(datasets, folder + dataset_type + '.t')
示例#18
0
opts = []
datasets = []
datasets_plain = []

for i in range(num_models):
    model = ResNet(kernel_size=filter_size, config=config, batch_size=batch, device=device)
    model.to(device)
    # model = torch.nn.DataParallel(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    dataloader = torch.utils.data.DataLoader(subsets[i], batch_size=batch, shuffle=True, num_workers=2)

    models.append(model)
    opts.append(optimizer)
    datasets.append(dataloader)

central_model = ResNet(kernel_size=filter_size, config=config, batch_size=batch, device=device)
central_model.to(device)
central_opt = torch.optim.Adam(central_model.parameters(), lr=1e-4)

shadow_models_set_A = []
shadow_opts_set_A = []

for i in range(num_models):
    model = ResNet(kernel_size=filter_size, config=config, batch_size=batch, device=device)
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    shadow_models_set_A.append(model)
示例#19
0
def main_worker(gpu, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    print("=> creating model '{}'".format(args.arch))
    if args.arch == 'vgg16':
        model = vgg16(pretrained=args.pretrained,
                      use_deepaugment_realtime=True)
        model.classifier[-1] = torch.nn.Linear(4096, len(classes_chosen))
        print(model)
    elif args.arch == 'vgg11':
        model = vgg11(pretrained=args.pretrained,
                      use_deepaugment_realtime=True)
        model.classifier[-1] = torch.nn.Linear(4096, len(classes_chosen))
        print(model)
    elif args.arch == 'resnet18':
        model = resnet18(pretrained=args.pretrained)
        model.fc = torch.nn.Linear(512, len(classes_chosen))
        print(model)
    else:
        raise NotImplementedError()

    model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=True)

    # optionally resume from a checkpoint
    args.start_epoch = 0
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            print('Start epoch:', args.start_epoch)
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    if args.data_standard == None:
        print("No Standard Data! Only using --data-distorted datasets")

    if args.data_distorted != None:
        if args.symlink_distorted_data_dirs:
            print("Mixing together data directories: ", args.data_distorted)

            train_dataset = torch.utils.data.ConcatDataset([
                CombinedDistortedDatasetFolder(
                    args.data_distorted,
                    transform=transforms.Compose([
                        transforms.RandomResizedCrop(224),
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        normalize,
                    ])),
                ImageNetSubsetDataset(args.data_standard,
                                      transform=transforms.Compose([
                                          transforms.RandomResizedCrop(224),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.ToTensor(),
                                          normalize,
                                      ]))
                if args.data_standard != None else []
            ])
        else:
            print(
                f"Concatenating Datasets {args.data_standard} and {args.data_distorted}"
            )

            datasets = [
                # args.data_standard
                ImageNetSubsetDataset(args.data_standard,
                                      transform=transforms.Compose([
                                          transforms.RandomResizedCrop(224),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.ToTensor(),
                                          normalize,
                                      ]))
                if args.data_standard != None else []
            ]

            for distorted_data_dir in args.data_distorted:
                datasets.append(
                    ImageNetSubsetDataset(
                        distorted_data_dir,
                        transform=transforms.Compose([
                            transforms.RandomResizedCrop(224),
                            transforms.RandomHorizontalFlip(),
                            transforms.ToTensor(),
                            normalize,
                        ])))

            train_dataset = torch.utils.data.ConcatDataset(datasets)
    else:
        print(f"Only using Dataset {args.data_standard}")
        train_dataset = ImageNetSubsetDataset(
            args.data_standard,
            transform=transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=None)

    val_loader = torch.utils.data.DataLoader(ImageNetSubsetDataset(
        args.data_val,
        transform=transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size_val,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    def cosine_annealing(step, total_steps, lr_max, lr_min):
        return lr_min + (lr_max - lr_min) * 0.5 * (
            1 + np.cos(step / total_steps * np.pi))

    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda step: cosine_annealing(
            step,
            args.epochs * len(train_loader),
            1,  # since lr_lambda computes multiplicative factor
            1e-6 / (args.lr * args.batch_size / 256.)))

    if args.start_epoch != 0:
        scheduler.step(args.start_epoch * len(train_loader))

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    ###########################################################################
    ##### Main Training Loop
    ###########################################################################

    if not args.resume:
        with open(os.path.join(args.save, 'training_log.csv'), 'w') as f:
            f.write(
                'epoch,train_loss,train_acc1,train_acc5,val_loss,val_acc1,val_acc5\n'
            )

    for epoch in range(args.start_epoch, args.epochs):

        # train for one epoch
        train_losses_avg, train_top1_avg, train_top5_avg = train(
            train_loader, model, criterion, optimizer, scheduler, epoch, args)

        print("Evaluating on validation set")

        # evaluate on validation set
        val_losses_avg, val_top1_avg, val_top5_avg = validate(
            val_loader, model, criterion, args)

        print("Finished Evaluating on validation set")

        # Save results in log file
        with open(os.path.join(args.save, 'training_log.csv'), 'a') as f:
            f.write(
                '%03d,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f\n' %
                ((epoch + 1), train_losses_avg, train_top1_avg, train_top5_avg,
                 val_losses_avg, val_top1_avg, val_top5_avg))

        # remember best acc@1 and save checkpoint
        is_best = val_top1_avg > best_acc1
        best_acc1 = max(val_top1_avg, best_acc1)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
示例#20
0
# Make Syft federated dataset
client_datapair_dict = {}
datasets = []

logging.info("Load federated dataset")
for client_id in client_ids:
    tmp_path = federated_path + '/hospital_' + str(client_id) + '.csv'
    x, y = eICU_data.get_train_data_from_hopital(client_id)
    client_datapair_dict["hospital_{}".format(client_id)] = (x, y)
#     client_data_list.append((pd.read_csv(federated_path + '/hospital_' + str(client_id) + '.csv')[predictive_attributes], )

for client_id in client_ids:
    tmp_tuple = client_datapair_dict["hospital_{}".format(client_id)]
    datasets.append(
        fl.BaseDataset(
            torch.tensor(tmp_tuple[0], dtype=torch.float32),
            torch.tensor(tmp_tuple[1].squeeze(), dtype=torch.long)).send(
                virtual_workers["hospital_{}".format(client_id)]))

fed_dataset = sy.FederatedDataset(datasets)
fdataloader = sy.FederatedDataLoader(fed_dataset,
                                     batch_size=args["batch_size"])

# Load test data
if args['split_strategy'] == 'trainN_testN':
    x, y = eICU_data.get_full_test_data()
if args['split_strategy'] == 'trainNminus1_test1':
    x, y = eICU_data.get_test_data_from_hopital(args['test_hospital_id'])
x_pt = torch.tensor(x, dtype=torch.float32)  # transform to torch tensor
y_pt = torch.tensor(y.squeeze(), dtype=torch.long)
my_dataset = TensorDataset(x_pt, y_pt)  # create your datset
示例#21
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)

    print("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch](pretrained=True)
    model.fc = torch.nn.Linear(512, 100)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # Define advnet resnet
    # advnet = ResNet(
    #     epsilon=args.advnet_epsilon,
    #     advnet_norm_factor=args.advnet_norm_factor
    # ).cuda()
    # advnet = torch.nn.DataParallel(advnet).cuda()
    advnet = ParallelResNet(epsilon=args.advnet_epsilon,
                            advnet_norm_factor=args.advnet_norm_factor)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=True)

    # Not all parameters are trainable
    advnet_trainable_params = dict(list(advnet.named_parameters()))
    # del advnet_trainable_params['module.block1.0.weight']
    # del advnet_trainable_params['module.block1.0.bias']
    # del advnet_trainable_params['module.block1.9.weight']
    # del advnet_trainable_params['module.block1.9.bias']
    # del advnet_trainable_params['module.block2.0.weight']
    # del advnet_trainable_params['module.block2.0.bias']
    # del advnet_trainable_params['module.block2.9.weight']
    # del advnet_trainable_params['module.block2.9.bias']
    optimizer_advnet = [
        torch.optim.SGD(advnet.blocks[i].parameters(),
                        args.lr_advnet,
                        momentum=args.momentum_advnet,
                        weight_decay=args.weight_decay_advnet,
                        nesterov=True) for i in range(len(classes_chosen))
    ]

    # optionally resume from a checkpoint
    args.start_epoch = 0
    if False:  #args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            print('Start epoch:', args.start_epoch)
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    if args.data_standard == None:
        print("No Standard Data! Only using --data-distorted datasets")

    if args.data_distorted != None:
        if args.symlink_distorted_data_dirs:
            print("Mixing together data directories: ", args.data_distorted)

            train_dataset = torch.utils.data.ConcatDataset([
                CombinedDistortedDatasetFolder(
                    args.data_distorted,
                    transform=transforms.Compose([
                        transforms.RandomResizedCrop(224),
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        normalize,
                    ])),
                ImageNetSubsetDataset(args.data_standard,
                                      transform=transforms.Compose([
                                          transforms.RandomResizedCrop(224),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.ToTensor(),
                                          normalize,
                                      ]))
                if args.data_standard != None else []
            ])
        else:
            print(
                f"Concatenating Datasets {args.data_standard} and {args.data_distorted}"
            )

            datasets = [
                # args.data_standard
                ImageNetSubsetDataset(args.data_standard,
                                      transform=transforms.Compose([
                                          transforms.RandomResizedCrop(224),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.ToTensor(),
                                          normalize,
                                      ]))
                if args.data_standard != None else []
            ]

            for distorted_data_dir in args.data_distorted:
                datasets.append(
                    ImageNetSubsetDataset(
                        distorted_data_dir,
                        transform=transforms.Compose([
                            transforms.RandomResizedCrop(224),
                            transforms.RandomHorizontalFlip(),
                            transforms.ToTensor(),
                            normalize,
                        ])))

            train_dataset = torch.utils.data.ConcatDataset(datasets)
    else:
        print(f"Only using Dataset {args.data_standard}")
        train_dataset = ImageNetSubsetDataset(
            args.data_standard,
            transform=transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(ImageNetSubsetDataset(
        args.data_val,
        transform=transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size_val,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    def cosine_annealing(step, total_steps, lr_max, lr_min):
        return lr_min + (lr_max - lr_min) * 0.5 * (
            1 + np.cos(step / total_steps * np.pi))

    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda step: cosine_annealing(
            step,
            args.epochs * len(train_loader),
            1,  # since lr_lambda computes multiplicative factor
            1e-6 / (args.lr * args.batch_size / 256.)))

    scheduler_advnet = None

    if args.start_epoch != 0:
        scheduler.step(args.start_epoch * len(train_loader))
        # scheduler_advnet.step(args.start_epoch * len(train_loader))

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    ###########################################################################
    ##### Main Training Loop
    ###########################################################################

    with open(os.path.join(args.save, 'training_log.csv'), 'w') as f:
        f.write(
            'epoch,train_loss,train_acc1,train_acc5,val_loss,val_acc1,val_acc5\n'
        )

    with open(os.path.join(args.save, 'command.txt'), 'w') as f:
        import pprint
        pprint.pprint(vars(args), stream=f)

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        # train for one epoch
        train_losses_avg, train_top1_avg, train_top5_avg = train(
            train_loader, model, advnet, criterion, optimizer, scheduler,
            optimizer_advnet, scheduler_advnet, epoch, args)

        print("Evaluating on validation set")

        # evaluate on validation set
        val_losses_avg, val_top1_avg, val_top5_avg = validate(
            val_loader, model, criterion, args)

        print("Finished Evaluating on validation set")

        # Save results in log file
        with open(os.path.join(args.save, 'training_log.csv'), 'a') as f:
            f.write(
                '%03d,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f\n' %
                ((epoch + 1), train_losses_avg, train_top1_avg, train_top5_avg,
                 val_losses_avg, val_top1_avg, val_top5_avg))

        # remember best acc@1 and save checkpoint
        is_best = val_top1_avg > best_acc1
        best_acc1 = max(val_top1_avg, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                    'advnet_state_dict': advnet.state_dict(),
                }, is_best)
示例#22
0
        #         print("omg")
        image = self.color_jitter(image)
        img_yuv = image.convert('YCbCr')
        img_yuv = transforms.functional.resize(img_yuv, (224, 224))
        img_yuv = transforms.functional.to_tensor(img_yuv)
        img_yuv = img_yuv.numpy()[::-1].copy()
        img_yuv = torch.from_numpy(img_yuv)
        img_yuv = transforms.functional.normalize(img_yuv,
                                                  [0.485, 0.456, 0.406],
                                                  [0.229, 0.224, 0.225])

        return img_yuv, torch.tensor([x, y]).float()


datasets = []
datasets.append(XYDataset("Final dataset/test/dataset_xy",
                          random_hflips=False))
# dataset = XYDataset("Final dataset/dataset_xy", random_hflips=False)

for folder in os.listdir("Final dataset/test/Augmentation"):
    # print(folder)
    datasets.append(
        XYDataset("Final dataset/test/Augmentation/" + folder,
                  random_hflips=False))

# read in our file    if (entry.path.endswith(".jpg")

dataset = D.ConcatDataset(datasets)
# dataset = XYDataset("Final dataset/test/dataset_xy", random_hflips=False)

if __name__ == '__main__':
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)

    print("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch](pretrained=True)
    model.fc = torch.nn.Linear(2048, len(classes_chosen))

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            raise NotImplementedError()
            # torch.cuda.set_device(args.gpu)
            # model.cuda(args.gpu)
            # # When using a single GPU per process and per
            # # DistributedDataParallel, we need to divide the batch size
            # # ourselves based on the total number of GPUs we have
            # args.batch_size = int(args.batch_size / ngpus_per_node)
            # args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
            # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        else:
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model.cuda()
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            raise NotImplementedError()
            # model.features = torch.nn.DataParallel(model.features)
            # model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    args.start_epoch = 0
    if args.classifier_pretrained_path != None:
        if os.path.isfile(args.classifier_pretrained_path):
            print("=> loading checkpoint '{}'".format(args.classifier_pretrained_path))
            sd = torch.load(args.classifier_pretrained_path)['state_dict']
            del sd['module.conv1.weight']
            del sd['module.fc.weight']
            del sd['module.fc.bias']
            model.load_state_dict(sd, strict=False)
            print("=> loaded checkpoint '{}'".format(args.classifier_pretrained_path))
        else:
            print("=> no checkpoint found at '{}'".format(args.classifier_pretrained_path))
            raise Exception()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay, nesterov=True)

    cudnn.benchmark = True

    # Data loading code

    if args.data_standard == None:
        print("No Standard Data! Only using --data-distorted datasets")

    transform_train_standard = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])

    if args.distorted_data_simple_transform:
        print("Using SIMPLE train transform for distorted data")
        transform_train_distorted = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
        ])
    else:
        print("Using STANDARD train transform for distorted data")
        transform_train_distorted = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
        ])

    if args.data_distorted != None:
        if args.symlink_distorted_data_dirs:
            print("Mixing together data directories: ", args.data_distorted)

            train_dataset = torch.utils.data.ConcatDataset([
                CombinedDistortedDatasetFolder(
                    args.data_distorted,
                    transform=transform_train_distorted
                ),
                ImageNetSubsetDataset(
                    args.data_standard,
                    transform=transform_train_standard
                ) if args.data_standard != None else []
            ])
        else:
            print(f"Concatenating Datasets {args.data_standard} and {args.data_distorted}")

            datasets = [
                # args.data_standard
                ImageNetSubsetDataset(
                    args.data_standard,
                    transform=transform_train_standard
                ) if args.data_standard != None else []
            ]

            for distorted_data_dir in args.data_distorted:
                datasets.append(
                    ImageNetSubsetDataset(
                        distorted_data_dir,
                        transform=transform_train_distorted
                    )
                )

            train_dataset = torch.utils.data.ConcatDataset(datasets)
    else:
        print(f"Only using Dataset {args.data_standard}")
        train_dataset = ImageNetSubsetDataset(
            args.data_standard,
            transform=transform_train_standard
        )


    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(
        ImageNetSubsetDataset(
            args.data_val, 
            transform=transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
            ])
        ),
        batch_size=args.batch_size_val, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    def cosine_annealing(step, total_steps, lr_max, lr_min):
        return lr_min + (lr_max - lr_min) * 0.5 * (
                1 + np.cos(step / total_steps * np.pi))

    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda step: cosine_annealing(
            step,
            args.epochs * len(train_loader),
            1,  # since lr_lambda computes multiplicative factor
            1e-6 / (args.lr * args.batch_size / 256.)))

    if args.start_epoch != 0:
        scheduler.step(args.start_epoch * len(train_loader))

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    ###########################################################################
    ##### Main Training Loop
    ###########################################################################

    with open(os.path.join(args.save, 'training_log.csv'), 'w') as f:
        f.write('epoch,train_loss,train_acc1,train_acc5,val_loss,val_acc1,val_acc5\n')

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        # train for one epoch
        train_losses_avg, train_top1_avg, train_top5_avg = train(train_loader, model, criterion, optimizer, scheduler, epoch, args)

        print("Evaluating on validation set")

        # evaluate on validation set
        val_losses_avg, val_top1_avg, val_top5_avg = validate(val_loader, model, criterion, args)

        print("Finished Evaluating on validation set")

        # Save results in log file
        with open(os.path.join(args.save, 'training_log.csv'), 'a') as f:
            f.write('%03d,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f\n' % (
                (epoch + 1),
                train_losses_avg, train_top1_avg, train_top5_avg,
                val_losses_avg, val_top1_avg, val_top5_avg
            ))

        # remember best acc@1 and save checkpoint
        is_best = val_top1_avg > best_acc1
        best_acc1 = max(val_top1_avg, best_acc1)

        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer' : optimizer.state_dict(),
            }, is_best)
示例#24
0
def test_all_datasets(args):
    #for each dataset, we have a dictionary, that contains
    #   - the name
    #   - the Parameters directory
    #   - name of data (***_names.txt file in Param directory)
    #   - the batch size for testing
    test_dir = join(args.root_dir, 'Doc/Test_all/')
    if not exists(test_dir):
        os.makedirs(test_dir)

    datasets = []

    MSRAB = ECSSD = DUT = SED2 = THUR = False

    MSRAB = True
    ECSSD = True
    DUT = True
    SED2 = True
    '''
    THUR = True
    '''
    if not args.resume:
        args.resume = join(args.root_dir +
                           'Doc/Phase_II_Fusion/checkpoint_latest.pth.tar')

    #01_MSRAB
    if MSRAB:
        datasets.append({ \
                'name' : '01_MSRAB', \
                'param_dir' : '/media/bigData/_80_User/Dax/UnsupSD/SD_beta/Data/01_MSRAB/Parameters/', \
                'data_prefix' : 'test', \
                'batch_size' : 1 \
                })
    #02_ECSSD
    if ECSSD:
        datasets.append({ \
                'name' : '02_ECSSD', \
                'param_dir' : '/media/bigData/_80_User/Dax/UnsupSD/SD_beta/Data/02_ECSSD/Parameters/', \
                'data_prefix' : 'all', \
                'batch_size' : 1 \
                })

    #03_DUT
    if DUT:
        datasets.append({ \
                'name' : '03_DUT', \
                'param_dir' : '/media/bigData/_80_User/Dax/UnsupSD/SD_beta/Data/03_DUT/Parameters/', \
                'data_prefix' : 'all', \
                'batch_size' : 1 \
                })

    #04_SED2
    if SED2:
        datasets.append({ \
                'name' : '04_SED2', \
                'param_dir' : '/media/bigData/_80_User/Dax/UnsupSD/SD_beta/Data/04_SED2/Parameters/', \
                'data_prefix' : 'all', \
                'batch_size' : 1 \
                })

    #06_THUR
    if THUR:
        datasets.append({ \
                'name' : '06_THUR', \
                'param_dir' : '/media/bigData/_80_User/Dax/UnsupSD/SD_beta/Data/06_THUR/Parameters/', \
                'data_prefix' : 'GT', \
                'batch_size' : 1 \
                })

    #Iterate through the dictionaries and test each dataset
    for dataset in datasets:
        #set correct arguments
        args.dataset_name = dataset['name']
        args.data_dir = dataset['param_dir']
        args.test_data = dataset['data_prefix']
        args.batch_size = dataset['batch_size']
        DOC = test_saliency(args)
        dataset['Result'] = DOC

    print("\n\n\t\t\tMAE\t\tF\t\tprecision\trecall")
    for dataset in datasets:
        print("{name}: \t\t{DOC.L1_GT.avg:.3f}\t\t{DOC.F_GT.avg:.3f}\t\t{DOC.prec_GT.avg:.3f}\t\t{DOC.recall_GT.avg:.3f}"\
            .format(name=dataset['name'], DOC=dataset['Result']) )

    result_file = join(test_dir, 'Test_Results.txt')
    f = open(result_file, 'a')
    f.write("\t\t\tMAE\t\tF\t\tprecision\trecall\n")
    for dataset in datasets:
        f.write("{name}: \t\t{DOC.L1_GT.avg:.5f}\t\t{DOC.F_GT.avg:.5f}\t\t{DOC.prec_GT.avg:.5f}\t\t{DOC.recall_GT.avg:.5f}\n"\
            .format(name=dataset['name'], DOC=dataset['Result']) )
    f.close()