Пример #1
0
def load_dataset(config):
    """
    Loads and returns the training and validation dataset
    :param config: configs for training/testing
    :return train_loader: dataloader for training data
    :return test_loader: dataloader for testing data
    :return num_of_classes: total classes in training set
    """
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    data_transforms = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor(), normalize])

    test_dataset = ImageDataset(root_dir=config["TEST_DATA_PATH"],
                                model_save_path=config["MODEL_SAVE_PATH"],
                                testing=True,
                                transform=data_transforms)
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=config["TEST_BATCH_SIZE"],
        shuffle=False,
        num_workers=config["TEST_WORKERS"],
        pin_memory=True)

    return test_loader, test_dataset.get_total_classes()
Пример #2
0
    def __init__(self, network, learning_rate):
        train_set = ImageDataset('data/2018-01-01-2019-01-01-20-False')

        self.train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                                        batch_size=10,
                                                        shuffle=False,
                                                        drop_last=True)

        test_set = ImageDataset('data/2019-01-01-2020-01-01-20-False')

        self.test_loader = torch.utils.data.DataLoader(dataset=test_set,
                                                       batch_size=10,
                                                       shuffle=False,
                                                       drop_last=True)

        self.learning_rate = learning_rate

        self.model = network

        self.lossfn = nn.BCELoss()

        #not sure for this
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=learning_rate,
                                   momentum=0.9)
Пример #3
0
def get_data(train_df, val_df, config):
    data_transforms = {
        'train':
        transforms.Compose([
            transforms.ToTensor(),
            transforms.CenterCrop(config['input_size'])
        ]),
        'val':
        transforms.Compose([
            transforms.ToTensor(),
            transforms.CenterCrop(config['input_size'])
        ])
    }

    image_datasets = {
        'train': ImageDataset(train_df, transforms=data_transforms['train']),
        'val': ImageDataset(val_df, transforms=data_transforms['val'])
    }

    dataloaders = {
        'train':
        DataLoader(image_datasets['train'],
                   batch_size=config['batch_size'],
                   shuffle=True,
                   num_workers=config['workers']),
        'val':
        DataLoader(image_datasets['val'],
                   batch_size=2 * config['batch_size'],
                   shuffle=False,
                   num_workers=config['workers'])
    }

    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

    return dataloaders, dataset_sizes
    def prepare_data(self):
        train_dataset = ImageDataset(
            dataset_cfg=self.cfg.dataset,
            transform=self.transforms["train"]
        )
        val_dataset = ImageDataset(
            dataset_cfg=self.cfg.dataset,
            transform=self.transforms["val"]
        )
        # cross validation
        k_fold = self.cfg.dataset.k_fold
        val_k = self.cfg.dataset.val_k
        if k_fold > 0:
            X, y = train_dataset.get_all_data()
            skf = StratifiedKFold(n_splits=k_fold, shuffle=False)
            fold_set = [(train_idx, val_idx)
                        for train_idx, val_idx in skf.split(X, y)]
            train_dataset = Subset(
                train_dataset, indices=fold_set[val_k][0])
            val_dataset = Subset(
                val_dataset, indices=fold_set[val_k][1])

        self.datasets = {
            "train": train_dataset,
            "val": val_dataset,
        }
Пример #5
0
def setup_dataset(config, label_a, label_b):
    train_dir_a = os.path.join(config['dataset']['train']['dirname'], label_a)
    train_dir_b = os.path.join(config['dataset']['train']['dirname'], label_b)
    test_dir_a = os.path.join(config['dataset']['test']['dirname'], label_a)
    test_dir_b = os.path.join(config['dataset']['test']['dirname'], label_b)
    assert os.path.exists(train_dir_a) and os.path.exists(train_dir_b)
    assert os.path.exists(test_dir_a) and os.path.exists(test_dir_b)

    img_size = config['img_size']
    train_transform = setup_image_transform(img_size=img_size,
                                            **config['trainsform'])
    test_transform = setup_image_transform(img_size=img_size)

    train_dataset_a = ImageDataset(train_dir_a, train_transform,
                                   config['dataset']['train']['ext'])
    train_dataset_b = ImageDataset(train_dir_b, train_transform,
                                   config['dataset']['train']['ext'])
    test_dataset_a = ImageDataset(test_dir_a, test_transform,
                                  config['dataset']['test']['ext'])
    test_dataset_b = ImageDataset(test_dir_b, test_transform,
                                  config['dataset']['test']['ext'])

    train_loader_a = DataLoader(train_dataset_a, **config['loader'],
                                shuffle=True)
    train_loader_b = DataLoader(train_dataset_b, **config['loader'],
                                shuffle=True)
    test_loader_a = DataLoader(test_dataset_a, **config['loader'])
    test_loader_b = DataLoader(test_dataset_b, **config['loader'])

    return {
        'train_loader_a': train_loader_a,
        'train_loader_b': train_loader_b,
        'test_loader_a': test_loader_a,
        'test_loader_b': test_loader_b,
    }
Пример #6
0
def get_object_confusion(class1, class2, similarity_model, config):

    model_config = config['model']
    benchmark_config = config['benchmark']
    model_path = model_config['model_filename']
    dataset_path = benchmark_config['dataset_path']

    params = {
        'dim': model_config['input_shape'],
        'batch_size': benchmark_config['batch_size'],
        'shuffle': False
    }

    test_dataset = ImageDataset(dataset_path, 'validation')
    test_dataset.prepare_specific(benchmark_config['test_cases'] // 2, class1,
                                  class2)
    test_generator = DataGenerator(test_dataset, **params)
    preds = np.array([])
    gts = np.array([])

    for i in tqdm(range(len(test_generator))):
        batch = test_generator[i]
        pred = similarity_model.predict_on_batch(batch[0])
        preds = np.append(preds, pred.flatten())
        gts = np.append(gts, batch[1])
        if benchmark_config['vis_output'] and not i % benchmark_config[
                'test_cases'] // (5 * benchmark_config['batch_size']):
            show_output(batch[0][0], batch[0][1], pred, batch[1])
    te_acc = compute_accuracy(preds, gts)
    print("Class 1: " + class1 + ", Class2: " + class2 +
          ", Distinguishability Score: " + str(te_acc))

    return te_acc
def train(epoch,args):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    batch_idx = 0
    ds = ImageDataset(args.dataset,dataset_load,'data/casia_landmark.txt',name=args.net+':train',
        bs=args.bs,shuffle=True,nthread=6,imagesize=128)
    while True:
        img,label = ds.get()
        if img is None: break
        inputs = torch.from_numpy(img).float()
        targets = torch.from_numpy(label[:,0]).long()
        if use_cuda: inputs, targets = inputs.cuda(), targets.cuda()

        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs_1, outputs_2 = net(inputs)
        outputs =
        loss = criterion(outputs, targets)
        lossd = loss.data[0]
        loss.backward()
        optimizer.step()

        train_loss += loss.data[0]
        outputs = outputs[0] # 0=cos_theta 1=phi_theta
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        printoneline(dt(),'Te=%d Loss=%.4f | AccT=%.4f%% (%d/%d) %.4f %.2f %d'
            % (epoch,train_loss/(batch_idx+1), 100.0*correct/total, correct, total, 
            lossd, criterion.lamb, criterion.it))
        batch_idx += 1
    print('')
Пример #8
0
 def __init__(self, server, port, scenario=0, dataset='MNIST'):
     self.local_model = ClassNet()
     # self.local_model = CIFARNet()
     self.host = server
     self.port = port
     self.attack = scenario
     self.evaluator = ImageDataset(dataset)
     self.train_loader, self.test_loader = self.evaluator.load_data()
Пример #9
0
def train(epoch,args):
    featureNet.train()
    maskNet.train()
    fcNet.train()
    train_loss = 0
    classification_loss = 0
    correct = 0
    total = 0
    batch_idx = 0
    ds = ImageDataset(args.dataset,dataset_load,'data/casia_landmark.txt',name=args.net+':train',
        bs=args.bs,shuffle=True,nthread=6,imagesize=128)
    while True:
        img,label = ds.get()
        if img is None: break
        inputs = torch.from_numpy(img).float()
        targets = torch.from_numpy(label[:,0]).long()
        if use_cuda: inputs, targets = inputs.cuda(), targets.cuda()

        optimizerMask.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        features = featureNet(inputs)
        mask = maskNet(features)
        maskedFeatures = torch.mul(mask, features)
        outputs = fcNet(maskedFeatures)
        outputs1 = outputs[0] # 0=cos_theta 1=phi_theta
        _, predicted = torch.max(outputs1.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()
        
        # training the advNet:
        lossAdv = criterion(outputs, targets)
        lossCompact = torch.sum(conv2d(mask, laplacianKernel, stride=1, groups=512))
        # lossSize   #L1 norm of the mask to make the mask sparse.
        lossSize = F.l1_loss(mask, target=torch.ones(mask.size()).cuda(), size_average = False)
        print("advnet:", - criterion2(outputs1, targets).data/10, lossCompact.data/1000000, lossSize.data/10000)
        loss = - criterion2(outputs1, targets)/10 + lossCompact/1000000 + lossSize/10000
        lossd = loss.data
        loss.backward(retain_graph=True)
        optimizerMask.step()
        
        optimizerFC.zero_grad()
        lossC = criterion(outputs, targets)
        lossClassification = lossC.data
        lossC.backward()
        optimizerFC.step()
        classification_loss += lossClassification
        train_loss += loss.data

        print("classification loss:", classification_loss / (batch_idx + 1))
        printoneline(dt(),'Te=%d Loss=%.4f | AccT=%.4f%% (%d/%d) %.4f %.2f %d\n'
            % (epoch,train_loss/(batch_idx+1), 100.0*correct/total, correct, total, 
            lossd, criterion.lamb, criterion.it))
        batch_idx += 1
        # break
    print('')
Пример #10
0
    def __init__(self, host, port, scenario=None, dataset='MNIST', delta=None):

        self.global_model = ClassNet() if dataset == 'MNIST' else CIFARNet()
        self.host = host
        self.port = port
        self.evaluator = ImageDataset(dataset)
        self.test_loader = None
        self.scenario = scenario
        self.isStart = False
        self.delta = delta
        self.wait_time = 30
        self._lock = threading.Lock()
        self.startup()
Пример #11
0
def image_output(model, images):
    """Get predicted output on a single image"""

    with torch.no_grad():
        model.eval()
        dataset = ImageDataset()
        for image in images:
            dataset.append([image, 0, ''])
        loader = utils.DataLoader(dataset, batch_size=1, num_workers=1, pin_memory=True)
        for item in loader:
            image = item[0]
            output = model(image, output='argmax')
            print(f'Output: {output}')
Пример #12
0
def dataloader(train_dir, test_dir, crop_size, batch_size):
    input_transforms = transforms.Compose([
        transforms.Resize((crop_size, crop_size)),
        transforms.ToTensor(),
    ])

    train_dataset = ImageDataset(train_dir, input_transforms)
    test_dataset = ImageDataset(test_dir, input_transforms)

    training_data_loader = DataLoader(dataset=train_dataset,
                                      batch_size=batch_size)
    testing_data_loader = DataLoader(dataset=test_dataset,
                                     batch_size=batch_size)

    return training_data_loader, testing_data_loader
def train_model(model, train):
    print('>>> Train model ...')
    dataset = ImageDataset(train['features'], train['labels'])
    dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2)
    n_epoch = 10
    lr = 0.0005

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(n_epoch):
        model.train()
        running_loss = 0.0

        for i, (features, labels) in enumerate(dataloader):
            optimizer.zero_grad()

            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 2000 == 1999:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

    return model
Пример #14
0
def evaluate():
    args = arg_parser()
    save_loc = os.path.join(args.save_loc, args.run_id)
    if not os.path.exists(save_loc):
        raise AssertionError('No directory named'
                             ' as {} found'.format(save_loc))
    network = resnet50(args.num_classes)
    models = list()
    # Ensemble of models (a single model can also be passed for testing)
    for model in args.models:
        network.load_state_dict(torch.load(os.path.join(save_loc, model)))
        models.append(network)

    device = None
    if args.device == 'cuda' and torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    img_list = parse_data(args.test_data)
    # NOTE: You may sort the data according to your usecase
    img_list.sort(key=lambda x: int(x[39:].strip('.jpg')))
    # Dataset and Dataloader
    test_dataset = ImageDataset(img_list)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.num_workers)
    assigned_labels = test_cls(models, test_loader, args.num_classes, device)
    # NOTE: You may amend this function for your usecase
    get_csv(assigned_labels, save_loc)
def main(image_dir, checkpoint_path, coloured_images_dir):
    test_data = ImageDataset(image_dir)
    num_images = len(os.listdir(f"{image_dir}/test"))
    test_dataloader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=num_images)

    model = Network()
    model = model.to(device)

    if device == torch.device("cpu"):
        checkpoint = torch.load(checkpoint_path,
                                map_location=torch.device("cpu"))
    elif device == torch.device("cuda"):
        checkpoint = torch.load(checkpoint_path)

    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()

    img_gray, img_ab, img_inception = iter(test_dataloader).next()
    img_gray, img_ab, img_inception = img_gray.to(device), img_ab.to(
        device), img_inception.to(device)

    with torch.no_grad():
        output = model(img_gray, img_inception)

    for idx in range(num_images):
        try:
            _, predicted_image, _ = convert_to_rgb(img_gray[idx].cpu(),
                                                   output[idx].cpu(),
                                                   img_ab[idx].cpu())
            plt.imsave(arr=predicted_image,
                       fname=f"{coloured_images_dir}/colourized_{idx}.jpg")
        except IndexError:
            break
Пример #16
0
def main(cfg):

    # data loader
    print("start DataLoader")
    print(f"dataset dir: {cfg['datasets']['dir_path']}")
    dataset = ImageDataset(cfg)
    data_loader = DataLoader(dataset,
                             batch_size=cfg["dataloader"]["batch_size"],
                             shuffle=False,
                             num_workers=cfg["dataloader"]["num_workers"],
                             collate_fn=dataloader_collate_fn)
    print(f"There are {len(dataset)} images to be processed")

    # feature extractor model
    print("start initialize model")
    model = build_model(cfg)
    model.load_param(cfg["model"]["weight"])
    model = model.cuda(0) if torch.cuda.device_count() >= 1 else img_batch
    model.eval()

    feature_bank = process_featerbank(data_loader, dataset, model, cfg)
    img_paths = list(feature_bank.keys())
    distance_mat = process_distancing(feature_bank, cfg)
    del feature_bank
    upload_to_storage(img_paths, cfg)
    argsorted_distance = process_argsort(distance_mat, cfg)
    del distance_mat
    upload_to_database(argsorted_distance, img_paths, cfg)
    del argsorted_distance
    print("processing has been completed")
def test_web_imgs(model, features):
    print('>>> Test web images ...')
    dataset = ImageDataset(features, np.array([13, 22, 15, 4, 38]))
    dataloader = DataLoader(dataset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=1)
    model.eval()

    for i, (features, labels) in enumerate(dataloader):
        outputs = model(features)
        print('label', labels)
        print('outputs', outputs)
        _, predicted = torch.max(outputs, 1)
        m = torch.nn.Softmax()
        softmax = m(outputs)
        print('softmax', softmax)
        print('predicted', predicted)
        sorts = []
        for i, output in enumerate(softmax[0]):
            sorts.append((output.item(), i))
        sorts = sorted(sorts, key=lambda x: x[0], reverse=True)
        print(sorts)
        for i in range(5):
            print('%.4f' % sorts[i][0])
Пример #18
0
def main():
    syn_path = './data/Synthetic'
    real_path = './data/Real'

    n_batch = 4
    num_epochs = 20
    learning_rate = 0.005
    train_dataset = ImageDataset(syn_path, real_path)
    train_loader = DataLoader(train_dataset,
                              batch_size=n_batch,
                              shuffle=True,
                              num_workers=12)
    valid_loader = DataLoader(train_dataset,
                              batch_size=1,
                              shuffle=False,
                              num_workers=12)

    classifier = ImageClassifier()
    classifier = classifier.cuda()
    loss_fn = nn.CrossEntropyLoss()
    loss_fn = loss_fn.cuda()
    optimizer = torch.optim.Adam([{
        'params': classifier.classifier.parameters()
    }],
                                 lr=learning_rate,
                                 weight_decay=1e-4)

    for epoch in range(num_epochs):
        classifier.train()
        for i, (images, label) in enumerate(train_loader):
            label = torch.LongTensor(label)
            images, label = images.cuda(), label.cuda()
            pred = classifier(images)
            # print(pred.size(),label.size())
            loss = loss_fn(pred, label)
            print('eopch:', epoch, i + 1, loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            classifier.eval()
            correct = 0
            total = 0
            for i, (images, label) in enumerate(valid_loader):
                label = torch.LongTensor(label)
                images, label = images.cuda(), label.cuda()
                pred = classifier(images)
                pred = pred.argmax(dim=1, keepdim=True).cpu()
                print(pred, label)
                correct += pred.eq(label.view_as(pred).cpu()).cpu().sum()
                total += pred.size(0)
            accuracy = float(correct) / total
            print(accuracy)
            best_acc = 0
            if (accuracy >= best_acc):
                best_acc = accuracy
                best_epoch = epoch
                torch.save(classifier.state_dict(), './classifier.pth')
Пример #19
0
    def __create_data(self):
        if self.use_transform:
            im_transform = transforms.Compose([
                transforms.Resize(self.input_size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
            ])
        else:
            im_transform = None

        im_dataset = {}
        for i in ['test', 'train', 'validation']:
            return_all = True if i == 'test' else False
            im_dataset[i] = ImageDataset(image_path_names=self.data_dict[i],
                                         captions_int=self.captions_int,
                                         im_addr=self.image_addr,
                                         transformer=im_transform,
                                         return_all=return_all)

        im_loader = {}
        for i in ['test', 'train', 'validation']:
            im_loader[i] = DataLoader(im_dataset[i],
                                      batch_size=self.batch_size,
                                      shuffle=self.shuffle,
                                      num_workers=self.num_works,
                                      drop_last=True)
        return im_dataset, im_loader
def inference(cfg):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # define model
    pl_module = (LightningModuleInference.load_from_checkpoint(
        CHECKPOINT, cfg=cfg).eval().to(device))

    # define transform
    transform = get_transform(cfg.transform.val)

    # define Dataset and Dataloader
    dataset_cfg = Dict({"root": TEST_ROOT_PATH})
    dataset = ImageDataset(dataset_cfg, transform)
    dataloader = DataLoader(
        dataset=dataset,
        batch_size=cfg.data.dataloader.batch_size // 2,
        num_workers=cfg.data.dataloader.num_workers,
        shuffle=cfg.data.dataloader.shuffle,
    )

    output_path = Path(OUTPUT_PATH)
    output_path.mkdir(parents=True, exist_ok=True)

    labels, predicts = [], []
    for imgs, targets in tqdm(dataloader):
        input = imgs.to(device, non_blocking=True)
        outputs = pl_module(input)
        _, predicted_indexes = torch.max(outputs.data, 1)
        targets = targets.cpu().numpy().tolist()
        predict_idx = predicted_indexes.cpu().numpy().tolist()
        labels.extend(targets)
        predicts.extend(predict_idx)
    calc_eval(labels, predicts)
Пример #21
0
    def __init__(self, config):
        self.rank, self.world_size = 0, 1
        if config['dist']:
            self.rank = dist.get_rank()
            self.world_size = dist.get_world_size()

        self.mode = config['dgp_mode']
        assert self.mode in [
            'reconstruct', 'colorization', 'SR', 'hybrid', 'inpainting',
            'morphing', 'defence', 'jitter'
        ]

        if self.rank == 0:
            # mkdir path
            if not os.path.exists('{}/images'.format(config['exp_path'])):
                os.makedirs('{}/images'.format(config['exp_path']))
            if not os.path.exists('{}/images_sheet'.format(
                    config['exp_path'])):
                os.makedirs('{}/images_sheet'.format(config['exp_path']))
            if not os.path.exists('{}/logs'.format(config['exp_path'])):
                os.makedirs('{}/logs'.format(config['exp_path']))

            # prepare logger
            if not config['no_tb']:
                try:
                    from tensorboardX import SummaryWriter
                except ImportError:
                    raise Exception("Please switch off \"tensorboard\" "
                                    "in your config file if you do not "
                                    "want to use it, otherwise install it.")
                self.tb_logger = SummaryWriter('{}'.format(config['exp_path']))
            else:
                self.tb_logger = None

            self.logger = utils.create_logger(
                'global_logger',
                '{}/logs/log_train.txt'.format(config['exp_path']))

        self.model = models.DGP(config)
        if self.mode == 'morphing':
            self.model2 = models.DGP(config)
            self.model_interp = models.DGP(config)

        # Data loader
        train_dataset = ImageDataset(
            config['root_dir'],
            config['list_file'],
            image_size=config['resolution'],
            normalize=True)
        sampler = utils.DistributedSampler(
            train_dataset) if config['dist'] else None
        self.train_loader = DataLoader(
            train_dataset,
            batch_size=1,
            shuffle=False,
            sampler=sampler,
            num_workers=1,
            pin_memory=False)
        self.config = config
Пример #22
0
def load_dataset() -> [DataLoader, DataLoader]:
    train_datasets = ImageDataset(config.train_image_dir, config.image_size, config.upscale_factor, "train")
    valid_datasets = ImageDataset(config.valid_image_dir, config.image_size, config.upscale_factor, "valid")
    train_dataloader = DataLoader(train_datasets,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=config.num_workers,
                                  pin_memory=True,
                                  persistent_workers=True)
    valid_dataloader = DataLoader(valid_datasets,
                                  batch_size=config.batch_size,
                                  shuffle=False,
                                  num_workers=config.num_workers,
                                  pin_memory=True,
                                  persistent_workers=True)

    return train_dataloader, valid_dataloader
Пример #23
0
def train(epoch, args):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    batch_idx = 0
    ds = ImageDataset(imageroot=args.dataset,
                      callback=dataset_load,
                      imagelistfile=args.data_list,
                      name=args.net + ':train',
                      batchsize=args.batchsize,
                      shuffle=True,
                      nthread=args.nthread,
                      imagesize=128)

    batch_num = ds.imagenum // args.batchsize
    while True:
        img, label = ds.get()
        if img is None:
            break
        inputs = torch.from_numpy(img).float()
        targets = torch.from_numpy(label[:, 0]).long()
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        outputs = outputs[0]  # 0=cos_theta 1=phi_theta
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).sum().item()

        if batch_idx % 10 == 0:
            print(
                dt(), 'Epoch=%d batch: %d/%d Loss=%.4f | Acc=%.4f%%' %
                (epoch, batch_idx, batch_num, train_loss /
                 (batch_idx + 1), correct * 100.0 / total))
        batch_idx += 1
Пример #24
0
def get_dataloader(data_dir, subdir, train=None):
    full_directory = os.path.join(data_dir, subdir)
    if os.path.exists(full_directory):
        dataset = ImageDataset(full_directory, train)
        return data.DataLoader(dataset,
                               batch_size=16,
                               shuffle=True,
                               num_workers=4)
        # return data.DataLoader(dataset, batch_size=2, shuffle=False, num_workers=0)
    return None
Пример #25
0
def main():
    """
    Compute MAV for all the training examples
    """
    parser = argparse.ArgumentParser(
        description='BC learning for image classification')
    parser.add_argument('--dataset',
                        required=True,
                        choices=['cifar10', 'cifar100'])
    parser.add_argument('--netType', required=True, choices=['convnet'])
    parser.add_argument('--data', required=True, help='Path to dataset')
    parser.add_argument('--save',
                        default='None',
                        help='Directory to save the results')
    parser.add_argument('--resume', required=True)
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--BC', action='store_true', help='BC learning')
    parser.add_argument('--plus', action='store_true', help='Use BC+')
    parser.add_argument('--batchSize', type=int, default=128)
    parser.add_argument('--seed', type=int, default=1701)
    parser.add_argument('--val', action='store_true')
    parser.add_argument('--nb_vals', type=int, default=10000)
    opt = parser.parse_args()
    opt.nClasses = 10

    model = getattr(models, opt.netType)(opt.nClasses)
    serializers.load_npz(opt.resume, model)
    model.to_gpu(opt.gpu)

    train_images, train_labels = load_dataset(opt)
    train_data = ImageDataset(train_images, train_labels, opt, train=False)
    train_iter = chainer.iterators.SerialIterator(train_data,
                                                  opt.batchSize,
                                                  repeat=False,
                                                  shuffle=False)

    chainer.config.train = False
    chainer.config.enable_backprop = False

    scores = [[] for _ in range(opt.nClasses)]
    for i, batch in enumerate(train_iter):
        x_array, t_array = chainer.dataset.concat_examples(batch)
        x = chainer.Variable(cuda.to_gpu(x_array, opt.gpu))
        fc6 = cuda.to_cpu(model(x).data)  # (B, 10)
        for score, (x, t) in zip(fc6, batch):
            if np.argmax(score) == t:
                scores[t].append(score)

    # Add channel axis (needed at multi-crop evaluation)
    scores = [np.array(x)[:, np.newaxis, :] for x in scores]  # (N_c, 1, C) * C
    mavs = np.array([np.mean(x, axis=0) for x in scores])  # (C, 1, C)

    joblib.dump(scores, os.path.join(opt.save, "train_scores.joblib"))
    joblib.dump(mavs, os.path.join(opt.save, "mavs.joblib"))
Пример #26
0
def predict(config):
    since = time.time()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    clf_model_path = trained_models_path + "clf.pth"
    reg_model_path = trained_models_path + "reg.pth"

    test_transforms = transforms.Compose(
        [transforms.ToTensor(),
         transforms.CenterCrop(config['input_size'])])

    sub_df = pd.read_csv(sub_df_path)

    test_dataset = ImageDataset(df=sub_df,
                                transforms=test_transforms,
                                is_train=False)

    test_loader = DataLoader(test_dataset,
                             batch_size=config['batch_size'] * 2,
                             shuffle=False,
                             num_workers=config['workers'])
    reg_model = TransferNet(models.resnet18(pretrained=False))
    clf_model = TransferNet(models.resnet18(pretrained=False))

    reg_model.load_state_dict(torch.load(reg_model_path))
    clf_model.load_state_dict(torch.load(clf_model_path))
    reg_model.to(device)
    clf_model.to(device)

    reg_preds_accum = []
    clf_preds_accum = []

    for batch in test_loader:
        image = batch['image'].to(device)
        with torch.no_grad():
            reg_preds = reg_model(image)
            clf_preds = clf_model(image)

            reg_preds = reg_preds.cpu().detach().numpy()
            clf_preds = clf_preds.cpu().detach().numpy()

            reg_preds_accum.extend(reg_preds)
            clf_preds_accum.extend(clf_preds)

    time_elapsed = time.time() - since
    print('Inference complete {:.0f}m {:.0f}s'.format(time_elapsed // 60,
                                                      time_elapsed % 60))

    reg_preds_accum = np.array(reg_preds_accum)
    clf_preds_accum = torch.tensor(clf_preds_accum)
    clf_preds_accum = torch.round(torch.sigmoid(clf_preds_accum)).numpy()

    return clf_preds_accum, reg_preds_accum
Пример #27
0
def train_loaders():
    """Get the train and validation loaders"""

    train_augment = transforms.Compose([
        transforms.ColorJitter(0.5, 0.5, 0.5),
        transforms.RandomAffine(degrees=10),
        transforms.GaussianBlur(5),
        transforms.RandomPerspective(distortion_scale=0.2)
    ])
    train_dataset = ImageDataset(img_dir=dir_train, augment=train_augment)
    val_dataset = ImageDataset(img_dir=dir_val)
    train_loader = utils.DataLoader(train_dataset,
                                    batch_size=batch_size,
                                    shuffle=shuffle_dataset,
                                    num_workers=1,
                                    pin_memory=True)
    validation_loader = utils.DataLoader(val_dataset,
                                         batch_size=batch_size,
                                         shuffle=shuffle_dataset,
                                         num_workers=1,
                                         pin_memory=True)
    return train_loader, validation_loader
def run(file,
        sim_matrix,
        data_path,
        annoy_distrance='angular',
        annoy_num_tree=50,
        output_path="./results"):
    lot_dict = dict()

    import csv
    reader = list(csv.reader(open(file, 'r')))
    cols = reader[0]
    for row in reader[1:]:
        lot = dict(zip(cols, row))
        lot_dict[lot['id']] = lot

    ds = ImageDataset(data_path)
    ann_indices = list(range(len(ds.image_paths)))
    data_indices = list(
        map(lambda x: x.split('/')[-1].split('.')[0], ds.image_paths))

    ann_to_data = dict(zip(ann_indices, data_indices))
    data_to_ann = dict(zip(data_indices, ann_indices))

    features = np.load(sim_matrix)
    ann = construct_ann_index(annoy_distrance, annoy_num_tree, features)

    arr_results = []
    for k in list(data_indices):
        if k in data_indices:
            ann_idx = data_to_ann[k]

            similar_images, distances = ann.get_nns_by_item(
                ann_idx, 11, include_distances=True)
            similar_images, distances = similar_images[1:], distances[1:]
            try:
                item = dict(artwork=dict(lot_dict[k]))

                sims = list(
                    map(lambda s: lot_dict[ann_to_data[s]].copy(),
                        similar_images))
                for idx in range(len(sims)):
                    sims[idx]['score'] = 1 - float(distances[idx])
                item['sims'] = list(
                    sorted(sims, key=lambda x: x['score'], reverse=True))
                arr_results.append(item)
            except:
                print('this one fails %d' % ann_idx)
    sim_name = sim_matrix.split('/')[-1]

    with open('%s/nn-from-%s.json' % (output_path, sim_name), 'w') as outfile:
        json.dump(arr_results, outfile)
Пример #29
0
def get_dataset_list(path):
    '''

    :param path:
    :return:
    '''
    datasets = []
    try:
        dataset_id = 0
        for subdir in os.listdir(path):
            if not subdir.startswith('.'):
                dataset_path = os.path.join(path, subdir)
                if subdir.split('_')[0] == 'image':
                    file_list = []
                    nr_images = 0
                    for file in os.listdir(dataset_path):
                        if file.endswith(('.JPEG', '.jpg', '.png')):
                            im = Image.open(dataset_path + "/" + file)
                            nr_images += 1
                            width, height = im.size
                            file_list.append({
                                "src": file,
                                "width": width,
                                "height": height
                            })
                            file_list = sorted(file_list,
                                               key=itemgetter('src'))
                    datasets.append(
                        ImageDataset(dataset_id, dataset_path,
                                     subdir.split('_')[1], file_list,
                                     nr_images))
                    dataset_id = dataset_id + 1
                elif subdir.split('_')[0] == 'text':
                    datasets.append(
                        TextDataset(dataset_id, dataset_path,
                                    subdir.split('_')[1]))
                    dataset_id = dataset_id + 1
                elif subdir == "tcav_concepts":
                    print("found concept directory for tcav")
                elif subdir == 'current_explanations':
                    print(
                        "found existing directory for explanation images. Images in the directory may be owerwritten."
                    )
                else:
                    print(
                        "{0} is not a valid dataset directory".format(subdir))
    except Exception as e:
        print(e)
    finally:
        return datasets
Пример #30
0
def get_data_loaders(input_dir,
                     batch_size=10,
                     num_workers=6,
                     val_size=100,
                     test_size=100):
    """Load data from the given folder and split into train/validation/test"""
    data_transforms = get_transformation()

    train_dataset = ImageDataset(input_dir=input_dir,
                                 test_size=test_size,
                                 val_size=val_size,
                                 transform=data_transforms)
    val_dataset = ImageDataset(input_dir=input_dir,
                               test_size=test_size,
                               val_size=val_size,
                               transform=data_transforms,
                               val=True)
    test_dataset = ImageDataset(input_dir=input_dir,
                                test_size=test_size,
                                val_size=val_size,
                                transform=data_transforms,
                                train=False)

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=num_workers)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=num_workers)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=num_workers)

    return train_loader, val_loader, test_loader