示例#1
0
文件: base.py 项目: imirzadeh/CL-Gym
    def load_joint(
            self,
            task: int,
            batch_size: int,
            shuffle: Optional[bool] = True,
            num_workers: Optional[int] = 0,
            pin_memory: Optional[bool] = True
    ) -> Tuple[DataLoader, DataLoader]:
        """
        Makes dataloaders for joint/multitask settings.
        i.e., for task `t` returns datasets for tasks `1, 2, ..., t-1, t`.
        
        Args:
            task: The task number.
            batch_size: The batch_size for dataloaders.
            shuffle: Should loaders be shuffled? Default: True.
            num_workers: corresponds to Pytorch's `num_workers` argument. Default: 0
            pin_memory: corresponds to Pytorch's `pin_memory` argument. Default: True.

        Returns:
            a Tuple of dataloaders, i.e., (train_loader, validation_loader).
            
        Examples::
            >>> benchmark = Benchmark(num_tasks=2, per_task_joint_examples=128)
            >>> # task 1 loaders (single): returns 4 batches (i.e., 128 examples)
            >>> train_loader_1, val_loader_1 = benchmark.load(1, batch_size=32)
            >>> # task 1 loaders (joint): returns 4 batches (i.e., 128 examples)
            >>> joint_train_loader_1, joint_val_loader_1 = benchmark.load_joint(1, batch_size=32)
            >>> # task 1 loaders (single): returns 4 batches (i.e., 128 examples)
            >>> train_loader_2, val_loader_2 = benchmark.load(2, batch_size=32)
            >>> # task 1 loaders (single): returns 8 batches (i.e., 256 examples)
            >>> joint_train_loader_2, joint_val_loader_2 = benchmark.load(2, batch_size=32)
        
        .. warning::
            The method will throw an error if `Benchmark` is instantiated without `per_task_joint_examples`.
            The reason is that, behind the scenese, we compute the indices for joint examples in
            `precompute_joint_indices()` method and this method relies on that computations.
        """
        if not self.per_task_joint_examples:
            raise ValueError(
                "Called load_joint() but per_task_joint_examples is not set")

        if task > self.num_tasks:
            raise ValueError(
                f"Asked to load task {task} but the benchmark has {self.num_tasks} tasks"
            )

        trains, tests = [], []
        for prev_task in range(1, task + 1):
            prev_train = Subset(self.trains[prev_task],
                                self.joint_indices_train[prev_task])
            prev_test = Subset(self.tests[prev_task],
                               self.joint_indices_test[prev_task])
            trains.append(prev_train)
            tests.append(prev_test)

        trains, tests = ConcatDataset(trains), ConcatDataset(tests)
        train_loader = DataLoader(trains,
                                  batch_size,
                                  shuffle,
                                  num_workers=num_workers,
                                  pin_memory=pin_memory)
        test_loader = DataLoader(tests,
                                 batch_size,
                                 shuffle,
                                 num_workers=num_workers,
                                 pin_memory=pin_memory)
        return train_loader, test_loader
示例#2
0
def train_val_dataset(dataset, val_split=0.25):
    train_idx, val_idx = train_test_split(list(range(len(dataset))),
                                          test_size=val_split)
    return Subset(dataset, train_idx), Subset(dataset, val_idx)
示例#3
0
def getData(name='cifar10', train_bs=128, test_bs=1000):

    if name == 'mnist':

        train_loader = datasets.MNIST('./data',
                                      train=True,
                                      download=True,
                                      transform=transforms.Compose([
                                          transforms.ToTensor(),
                                      ]))

        val_loader = datasets.MNIST('./data',
                                    train=True,
                                    download=True,
                                    transform=transforms.Compose([
                                        transforms.ToTensor(),
                                    ]))

        offset = 3000
        rng = np.random.RandomState(1234)
        R = rng.permutation(len(train_loader))
        lengths = (len(train_loader) - offset, offset)
        train_loader, val_loader = [
            Subset(train_loader, R[offset - length:offset])
            for offset, length in zip(_accumulate(lengths), lengths)
        ]

        train_loader = torch.utils.data.DataLoader(train_loader,
                                                   batch_size=train_bs,
                                                   shuffle=True)
        val_loader = torch.utils.data.DataLoader(val_loader,
                                                 batch_size=test_bs,
                                                 shuffle=False)

        test_loader = torch.utils.data.DataLoader(datasets.MNIST(
            './data',
            train=False,
            download=False,
            transform=transforms.Compose([
                transforms.ToTensor(),
            ])),
                                                  batch_size=test_bs,
                                                  shuffle=False)

    if name == 'pmnist':

        trainset = datasets.MNIST(root='./data',
                                  train=True,
                                  download=True,
                                  transform=transforms.Compose([
                                      transforms.ToTensor(),
                                  ]))

        testset = datasets.MNIST(root='./data',
                                 train=False,
                                 download=False,
                                 transform=transforms.Compose([
                                     transforms.ToTensor(),
                                 ]))

        x_train = trainset.train_data
        y_train = trainset.targets

        x_test = testset.test_data
        y_test = testset.targets

        torch.manual_seed(12008)
        perm = torch.randperm(784)

        x_train_permuted = x_train.reshape(x_train.shape[0], -1)
        x_train_permuted = x_train_permuted[:, perm]
        x_train_permuted = x_train_permuted.reshape(x_train.shape[0], 28, 28)

        x_test_permuted = x_test.reshape(x_test.shape[0], -1)
        x_test_permuted = x_test_permuted[:, perm]
        x_test_permuted = x_test_permuted.reshape(x_test.shape[0], 28, 28)

        x_train_permuted = add_channels(x_train_permuted)
        x_test_permuted = add_channels(x_test_permuted)

        train_loader = torch.utils.data.TensorDataset(x_train_permuted.float(),
                                                      y_train)

        offset = 3000
        rng = np.random.RandomState(1234)
        R = rng.permutation(len(train_loader))
        lengths = (len(train_loader) - offset, offset)
        train_loader, val_loader = [
            Subset(train_loader, R[offset - length:offset])
            for offset, length in zip(_accumulate(lengths), lengths)
        ]

        train_loader = torch.utils.data.DataLoader(train_loader,
                                                   batch_size=train_bs,
                                                   shuffle=True)
        val_loader = torch.utils.data.DataLoader(val_loader,
                                                 batch_size=test_bs,
                                                 shuffle=False)

        test_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(x_test_permuted.float(), y_test),
            batch_size=test_bs,
            shuffle=False)

    if name == 'cifar10':
        transform_train = transforms.Compose([
            transforms.ToTensor(),
        ])

        transform_test = transforms.Compose([
            transforms.ToTensor(),
        ])

        train_loader = datasets.CIFAR10(root='./data',
                                        train=True,
                                        download=True,
                                        transform=transform_train)

        offset = 3000
        rng = np.random.RandomState(1234)
        R = rng.permutation(len(train_loader))
        lengths = (len(train_loader) - offset, offset)
        train_loader, val_loader = [
            Subset(train_loader, R[offset - length:offset])
            for offset, length in zip(_accumulate(lengths), lengths)
        ]

        train_loader = torch.utils.data.DataLoader(train_loader,
                                                   batch_size=train_bs,
                                                   shuffle=True)
        val_loader = torch.utils.data.DataLoader(val_loader,
                                                 batch_size=test_bs,
                                                 shuffle=False)

        testset = datasets.CIFAR10(root='./data',
                                   train=False,
                                   download=False,
                                   transform=transform_test)
        test_loader = torch.utils.data.DataLoader(testset,
                                                  batch_size=test_bs,
                                                  shuffle=False)

    if name == 'double_pendulum':
        # open a file, where you stored the pickled data
        file = open("./data/double_pendulum.pkl", 'rb')
        data = pickle.load(file)
        file.close()

        trainset = []
        train_target = []
        testset = []
        test_target = []

        for i in range(1, 400):
            trainset.append(data[i:i + 1000])
            train_target.append(data[i + 1000 + 1])

        for i in range(1501, 3000):
            testset.append(data[i:i + 1000])
            test_target.append(data[i + 1000 + 1])

        trainset = np.asarray(trainset)
        testset = np.asarray(testset)
        train_target = np.asarray(train_target)
        test_target = np.asarray(test_target)

        trainset = torch.tensor(trainset)
        testset = torch.tensor(testset)
        train_target = torch.tensor(train_target)
        test_target = torch.tensor(test_target)

        #trainset = add_channels(trainset)
        #testset = add_channels(testset)

        train_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(trainset.float(),
                                           train_target.float()),
            batch_size=train_bs,
            shuffle=True)
        test_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(testset.float(),
                                           test_target.float()),
            batch_size=test_bs,
            shuffle=False)

    return train_loader, test_loader, val_loader
np.random.seed(1)
torch.manual_seed(1)

class histoCancerDataset(Dataset):
    def __init__(self, data_dir, transform,data_type="train"):      
        path2data=os.path.join(data_dir, data_type)
        self.filenames = os.listdir(path2data)
        self.full_filenames = [os.path.join(path2data, f) for f in self.filenames]
        csv_filename=data_type+"_labels.csv"
        path2csvLabels=os.path.join(data_dir,csv_filename)
        labels_df=pd.read_csv(path2csvLabels)
        labels_df.set_index("id", inplace=True)
        self.labels = [labels_df.loc[filename[:-4]].values[0] for filename in self.filenames]
        self.transform = transform
      
    def __len__(self):
        return len(self.full_filenames)
      
    def __getitem__(self, idx):
        image = Image.open(self.full_filenames[idx])
        image = self.transform(image)
        return image, self.labels[idx]

data_dir = "../chapter2/data/"
data_transformer = transforms.Compose([transforms.ToTensor()])   
hist_ds = histoCancerDataset(data_dir, data_transformer,data_type="train")

test_index = np.random.randint(hist_ds.__len__(),size= 100)
test_ds = Subset(hist_ds,test_index)
test_dl = DataLoader(test_ds, batch_size=1, shuffle=False)  
    
示例#5
0
def _get_datasets(dataset, dataroot, load_train:bool, load_test:bool,
        transform_train, transform_test, train_max_size:int, test_max_size:int)\
            ->Tuple[DatasetLike, DatasetLike]:
    logger = get_logger()
    trainset, testset = None, None

    if dataset == 'cifar10':
        if load_train:
            # NOTE: train transforms will also be applied to validation set
            trainset = torchvision.datasets.CIFAR10(root=dataroot,
                                                    train=True,
                                                    download=True,
                                                    transform=transform_train)
        if load_test:
            testset = torchvision.datasets.CIFAR10(root=dataroot,
                                                   train=False,
                                                   download=True,
                                                   transform=transform_test)
    elif dataset == 'mnist':
        if load_train:
            trainset = torchvision.datasets.MNIST(root=dataroot,
                                                  train=True,
                                                  download=True,
                                                  transform=transform_train)
        if load_test:
            testset = torchvision.datasets.MNIST(root=dataroot,
                                                 train=False,
                                                 download=True,
                                                 transform=transform_test)
    elif dataset == 'fashionmnist':
        if load_train:
            trainset = torchvision.datasets.FashionMNIST(
                root=dataroot,
                train=True,
                download=True,
                transform=transform_train)
        if load_test:
            testset = torchvision.datasets.FashionMNIST(
                root=dataroot,
                train=False,
                download=True,
                transform=transform_test)
    elif dataset == 'reduced_cifar10':
        if load_train:
            trainset = torchvision.datasets.CIFAR10(root=dataroot,
                                                    train=True,
                                                    download=True,
                                                    transform=transform_train)
            sss = StratifiedShuffleSplit(n_splits=1, test_size=46000)  # 4000
            sss = sss.split(list(range(len(trainset))), trainset.targets)
            train_idx, valid_idx = next(sss)
            targets = [trainset.targets[idx] for idx in train_idx]
            trainset = Subset(trainset, train_idx)
            trainset.targets = targets
        if load_test:
            testset = torchvision.datasets.CIFAR10(root=dataroot,
                                                   train=False,
                                                   download=True,
                                                   transform=transform_test)
    elif dataset == 'cifar100':
        if load_train:
            trainset = torchvision.datasets.CIFAR100(root=dataroot,
                                                     train=True,
                                                     download=True,
                                                     transform=transform_train)
        if load_test:
            testset = torchvision.datasets.CIFAR100(root=dataroot,
                                                    train=False,
                                                    download=True,
                                                    transform=transform_test)
    elif dataset == 'svhn':
        if load_train:
            trainset = torchvision.datasets.SVHN(root=dataroot,
                                                 split='train',
                                                 download=True,
                                                 transform=transform_train)
            extraset = torchvision.datasets.SVHN(root=dataroot,
                                                 split='extra',
                                                 download=True,
                                                 transform=transform_train)
            trainset = ConcatDataset([trainset, extraset])
        if load_test:
            testset = torchvision.datasets.SVHN(root=dataroot,
                                                split='test',
                                                download=True,
                                                transform=transform_test)
    elif dataset == 'reduced_svhn':
        if load_train:
            trainset = torchvision.datasets.SVHN(root=dataroot,
                                                 split='train',
                                                 download=True,
                                                 transform=transform_train)
            sss = StratifiedShuffleSplit(n_splits=1,
                                         test_size=73257 - 1000)  #1000
            sss = sss.split(list(range(len(trainset))), trainset.targets)
            train_idx, valid_idx = next(sss)
            targets = [trainset.targets[idx] for idx in train_idx]
            trainset = Subset(trainset, train_idx)
            trainset.targets = targets
        if load_test:
            testset = torchvision.datasets.SVHN(root=dataroot,
                                                split='test',
                                                download=True,
                                                transform=transform_test)
    elif dataset == 'imagenet':
        if load_train:
            trainset = ImageNet(root=os.path.join(dataroot,
                                                  'imagenet-pytorch'),
                                transform=transform_train)
            # compatibility
            trainset.targets = [lb for _, lb in trainset.samples]
        if load_test:
            testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'),
                               split='val',
                               transform=transform_test)
    elif dataset == 'reduced_imagenet':
        # randomly chosen indices
        idx120 = [
            904, 385, 759, 884, 784, 844, 132, 214, 990, 786, 979, 582, 104,
            288, 697, 480, 66, 943, 308, 282, 118, 926, 882, 478, 133, 884,
            570, 964, 825, 656, 661, 289, 385, 448, 705, 609, 955, 5, 703, 713,
            695, 811, 958, 147, 6, 3, 59, 354, 315, 514, 741, 525, 685, 673,
            657, 267, 575, 501, 30, 455, 905, 860, 355, 911, 24, 708, 346, 195,
            660, 528, 330, 511, 439, 150, 988, 940, 236, 803, 741, 295, 111,
            520, 856, 248, 203, 147, 625, 589, 708, 201, 712, 630, 630, 367,
            273, 931, 960, 274, 112, 239, 463, 355, 955, 525, 404, 59, 981,
            725, 90, 782, 604, 323, 418, 35, 95, 97, 193, 690, 869, 172
        ]
        if load_train:
            trainset = ImageNet(root=os.path.join(dataroot,
                                                  'imagenet-pytorch'),
                                transform=transform_train)
            # compatibility
            trainset.targets = [lb for _, lb in trainset.samples]

            sss = StratifiedShuffleSplit(n_splits=1,
                                         test_size=len(trainset) - 500000,
                                         random_state=0)  # 4000
            sss = sss.split(list(range(len(trainset))), trainset.targets)
            train_idx, valid_idx = next(sss)

            # filter out
            train_idx = list(
                filter(lambda x: trainset.labels[x] in idx120, train_idx))
            valid_idx = list(
                filter(lambda x: trainset.labels[x] in idx120, valid_idx))

            targets = [
                idx120.index(trainset.targets[idx]) for idx in train_idx
            ]
            for idx in range(len(trainset.samples)):
                if trainset.samples[idx][1] not in idx120:
                    continue
                trainset.samples[idx] = (trainset.samples[idx][0],
                                         idx120.index(
                                             trainset.samples[idx][1]))
            trainset = Subset(trainset, train_idx)
            trainset.targets = targets
            logger.info('reduced_imagenet train={}'.format(len(trainset)))
        if load_test:
            testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'),
                               split='val',
                               transform=transform_test)
            test_idx = list(filter(lambda x: testset.samples[x][1] in \
                idx120, range(len(testset))))
            for idx in range(len(testset.samples)):
                if testset.samples[idx][1] not in idx120:
                    continue
                testset.samples[idx] = (testset.samples[idx][0],
                                        idx120.index(testset.samples[idx][1]))
            testset = Subset(testset, test_idx)
    else:
        raise ValueError('invalid dataset name=%s' % dataset)

    if train_max_size > 0:
        logger.warn(
            'Trainset trimmed to max_batches = {}'.format(train_max_size))
        trainset = LimitDataset(trainset, train_max_size)
    if test_max_size > 0:
        logger.warn(
            'Testset trimmed to max_batches = {}'.format(test_max_size))
        testset = LimitDataset(testset, test_max_size)

    return trainset, testset
示例#6
0
def train(opt):
    # log stuff
    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)
    writer = SummaryWriter(opt.log_path)

    dataset = MotBBImageSequence('dataset_utils/Mot17_test_single.txt',
                                 use_only_first_video=False,
                                 new_width=832,
                                 new_height=832)
    train_data = Subset(dataset, range(0, dataset.valid_begin))
    valid_data = Subset(dataset, range(dataset.valid_begin, len(dataset)))
    train_loader = DataLoader(train_data,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              num_workers=opt.num_workers,
                              drop_last=True)
    valid_loader = DataLoader(valid_data,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              num_workers=opt.num_workers,
                              drop_last=True)

    obs_length = 10
    pred_length = 9

    epoch_len = len(train_loader)

    for epoch in range(opt.num_epoches):
        for img_num, (gt, b, img1, img2) in enumerate(train_loader):
            opt.model.train()
            opt.decoder.train()
            opt.model.lstm_part.reinit_lstm(opt.batch_size)
            opt.decoder.reset_hidden(opt.batch_size)

            seq_loss = 0
            seq_ap = 0
            seq_loss_coord, seq_loss_conf, seq_loss_pred = (0.0, 0.0, 0.0)
            pred_sequence = []
            for i in range(obs_length):
                single_gt = b[:, i].to(opt.device())
                single_img2 = img2[:, i].to(opt.device())
                single_img2_normed = preprocess(img2[:, i]).to(opt.device())
                single_img1_normed = preprocess(img1[:, i]).to(opt.device())

                double_image = torch.cat(
                    (single_img1_normed, single_img2_normed), dim=1)

                logits = opt.model((double_image, single_img2 * 255.))

                loss, loss_coord, loss_conf = opt.yolo_loss(logits, single_gt)
                seq_ap += get_ap(logits, filter_gt_batch(single_gt),
                                 opt.image_size, opt.image_size,
                                 opt.model.anchors)
                seq_loss += loss
                seq_loss_conf += loss_conf.item()
                seq_loss_coord += loss_coord.item()

            prev_out = logits_to_box_params(logits.detach(), opt.model.anchors)
            # the box parameters are normalized to [0, 1]
            # at the moment [batch, anchors, ...., h* w]
            # rearrange tensor s.t [batch, h, w, anchors, ...]
            # ... == x, y, w, h, conf
            prev_out = prev_out.view(opt.batch_size, len(opt.model.anchors), -1, opt.encoding_size, opt.encoding_size) \
                .permute(0, 3, 4, 1, 2)
            # be carefull the position of the conf/id in the targets is 0 not 4
            # so we change it at this point to be consistent with the labels

            prev_out = torch.Tensor(np.roll(prev_out.cpu().numpy(), 1,
                                            axis=-1)).to(opt.device())

            opt.decoder.set_hidden(opt.model.lstm_part.hidden,
                                   opt.model.lstm_part.cell)

            for i in range(pred_length):
                _, yolo_target = opt.pred_loss.to_yolo(
                    input=gt[:, obs_length].numpy(),
                    target=b[:, obs_length + i].numpy(),
                    use_iou=True)
                yolo_target = torch.Tensor(yolo_target).to(opt.device())
                # target boxes are in [0, grid_h] -> normalize to 1
                # at this point i assume that the image is a square !!!
                yolo_target[:, :, :, :,
                            1:] = yolo_target[:, :, :, :,
                                              1:] / opt.encoding_size

                input_tensor = prev_out[:, :, :, :, 1:].contiguous() \
                                                       .view(opt.batch_size, opt.encoding_size, opt.encoding_size,
                                                             len(opt.model.anchors) * 4) \
                                                       .permute(0, 3, 1, 2)
                pred = opt.decoder(input_tensor)
                pred = pred.view(opt.batch_size, len(opt.model.anchors), -1, opt.encoding_size, opt.encoding_size) \
                    .permute(0, 3, 4, 1, 2)

                seq_loss_pred += opt.pred_loss.forward(pred, prev_out,
                                                       yolo_target)
                pred_sequence.append((prev_out.detach().cpu().numpy(),
                                      pred.detach().cpu().numpy(),
                                      yolo_target.detach().cpu().numpy()))

                prev_out[:, :, :, :, 1:] = yolo_target[:, :, :, :, 1:]

            seq_loss += seq_loss_pred
            loss = seq_loss
            loss.backward()

            opt.optimizer_encoder.step()
            opt.optimizer_decoder.step()

            opt.optimizer_encoder.zero_grad()
            opt.optimizer_decoder.zero_grad()

            seq_loss = seq_loss.item() - seq_loss_pred.item()

            writeLossToSummary(writer, 'Train', seq_loss / obs_length,
                               seq_loss_coord / obs_length,
                               seq_loss_conf / obs_length,
                               epoch * epoch_len + img_num)

            print(f"epoch:{epoch} it: {img_num}")
            print(f"loss_seq: {seq_loss/obs_length}, "
                  f"loss_coord: {seq_loss_coord / obs_length}, "
                  f"loss_conf: {seq_loss_conf / obs_length}, "
                  f"mAP: {seq_ap/obs_length}")

            seq_loss_pred = seq_loss_pred.item()
            box_list = prediction_to_box_list(pred_sequence)
            dis_error = displacement_error(box_list,
                                           center_distance,
                                           image_size=832.0)
            writer.add_scalar('Train/loss_pred', seq_loss_pred / pred_length,
                              epoch * epoch_len + img_num)
            writer.add_scalar('Train/dis_err', dis_error,
                              epoch * epoch_len + img_num)
            writer.add_scalar('Train/AP', seq_ap / obs_length,
                              epoch * epoch_len + img_num)
            print(
                f"loss_pred: {seq_loss_pred / pred_length}, dis_error: {dis_error}"
            )

        # draws last batch
        draw_pred_sequence(box_list,
                           img2[0],
                           pred_length,
                           obs_length,
                           name='train_img.png',
                           image_size=832)

        ###############
        # VALIDATION
        ###############
        print("###############")
        print("# VALIDATION BEGIN")
        print("###############")
        opt.model.eval()
        opt.decoder.eval()
        valid_len = len(valid_loader)
        loss_val = 0
        loss_ap = 0
        loss_coord_val = 0
        loss_conf_val = 0
        loss_pred_val = 0
        dis_error_val = 0
        for img_num, (gt, b, img1, img2) in enumerate(valid_loader):
            opt.model.lstm_part.reinit_lstm(opt.batch_size)
            opt.decoder.reset_hidden(opt.batch_size)
            opt.optimizer_encoder.zero_grad()
            opt.optimizer_decoder.zero_grad()

            seq_loss = 0
            seq_loss_coord, seq_loss_conf, seq_loss_pred = (0.0, 0.0, 0.0)
            seq_ap = 0
            pred_sequence = []
            for i in range(obs_length):
                single_gt = b[:, i].to(opt.device())
                single_img2 = img2[:, i].to(opt.device())
                single_img2_normed = preprocess(img2[:, i]).to(opt.device())
                single_img1_normed = preprocess(img1[:, i]).to(opt.device())

                double_image = torch.cat(
                    (single_img1_normed, single_img2_normed), dim=1)
                with torch.no_grad():
                    logits = opt.model((double_image, single_img2 * 255.))

                    loss, loss_coord, loss_conf = opt.yolo_loss(
                        logits, single_gt)

                seq_ap += get_ap(logits.detach(), filter_gt_batch(single_gt),
                                 opt.image_size, opt.image_size,
                                 opt.model.anchors)
                seq_loss += loss
                seq_loss_conf += loss_conf.item()
                seq_loss_coord += loss_coord.item()

            with torch.no_grad():
                prev_out = logits_to_box_params(logits.detach(),
                                                opt.model.anchors)
                # the box parameters are normalized to [0, 1]
                # at the moment [batch, anchors, ...., h* w]
                # rearrange tensor s.t [batch, h, w, anchors, ...]
                # ... == x, y, w, h, conf
                # print(f"origin mask {torch.sum(mask, dim=(0, 1))}")
                prev_out = prev_out.view(opt.batch_size, len(opt.model.anchors), -1, opt.encoding_size,
                                         opt.encoding_size) \
                    .permute(0, 3, 4, 1, 2)
                # be carefull the position of the conf/id in the targets is 0 not 4
                # so we change it at this point to be consistent with the labels

                prev_out = torch.Tensor(
                    np.roll(prev_out.cpu().numpy(), 1,
                            axis=-1)).to(opt.device())

                opt.decoder.set_hidden(opt.model.lstm_part.hidden,
                                       opt.model.lstm_part.cell)
                for i in range(pred_length):
                    _, yolo_target = opt.pred_loss.to_yolo(
                        input=gt[:, obs_length].numpy(),
                        target=b[:, obs_length + i].numpy(),
                        use_iou=True)
                    yolo_target = torch.Tensor(yolo_target).to(opt.device())
                    # target boxes are in [0, grid_h] -> normalize to 1
                    # at this point i assume that the image is a square !!!
                    yolo_target[:, :, :, :,
                                1:] = yolo_target[:, :, :, :,
                                                  1:] / opt.encoding_size
                    input_tensor = prev_out[:, :, :, :, 1:].contiguous() \
                                                           .view(opt.batch_size, opt.encoding_size, opt.encoding_size,
                                                                 len(opt.model.anchors) * 4) \
                                                           .permute(0, 3, 1, 2)
                    pred = opt.decoder(input_tensor)
                    pred = pred.view(opt.batch_size, len(opt.model.anchors), -1, opt.encoding_size, opt.encoding_size) \
                        .permute(0, 3, 4, 1, 2)

                    seq_loss_pred += opt.pred_loss.forward(
                        pred, prev_out, yolo_target)
                    pred_sequence.append((prev_out.detach().cpu().numpy(),
                                          pred.detach().cpu().numpy(),
                                          yolo_target.detach().cpu().numpy()))
                    prev_out[:, :, :, :, 1:] += pred

                seq_loss_pred = seq_loss_pred.item()
                box_list = prediction_to_box_list(pred_sequence)
                dis_error = displacement_error(box_list,
                                               center_distance,
                                               image_size=832.0)

            loss_val += seq_loss.item() / valid_len / obs_length
            loss_coord_val += seq_loss_coord / valid_len / obs_length
            loss_conf_val += seq_loss_conf / valid_len / obs_length
            loss_ap += seq_ap / obs_length / valid_len

            loss_pred_val += seq_loss_pred / valid_len / pred_length
            dis_error_val += dis_error / valid_len
        draw_pred_sequence(box_list,
                           img2[0],
                           pred_length,
                           obs_length,
                           name=f'val_img.png',
                           image_size=832)
        writeLossToSummary(writer, 'Val', loss_val, loss_coord_val,
                           loss_conf_val, (epoch + 1) * epoch_len)

        print(f"epoch:{epoch}")
        print(
            f"loss_seq: {loss_val}, loss_coord: {loss_coord_val}, loss_conf: {loss_conf_val}, mAP: {loss_ap}"
        )

        writer.add_scalar('Val/loss_pred', loss_pred_val,
                          (epoch + 1) * epoch_len)
        writer.add_scalar('Val/dis_err', dis_error_val,
                          (epoch + 1) * epoch_len)
        writer.add_scalar('Val/mAP', loss_ap, (epoch + 1) * epoch_len)

        print(f"loss_pred: {loss_pred_val}, dis_error: {dis_error_val}")
        print("###############")
        print("# VALIDATION END")
        print("###############")

        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': opt.model.state_dict(),
                'optimizer_state_dict': opt.optimizer_encoder.state_dict()
            }, opt.log_path + f'/snapshot_encoder{epoch}.tar')
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': opt.decoder.state_dict(),
                'optimizer_state_dict': opt.optimizer_decoder.state_dict()
            }, opt.log_path + f'/snapshot_decoder{epoch}.tar')

    writer.close()
    from datasets.mb_speech import MBSpeech as SpeechDataset, vocab

    # only 1 voice, so use much simpler train transform
    train_transform = Compose([LoadMagSpectrogram(),
                               ComputeMelSpectrogramFromMagSpectrogram(num_features=num_features,
                                                                       normalize=args.normalize, eps=eps),
                               ApplyAlbumentations(album.Compose([album.Cutout(num_holes=8)], p=1)),
                               TimeScaleSpectrogram(max_scale=0.1, probability=0.5),
                               MaskSpectrogram(frequency_mask_max_percentage=0.3,
                                               time_mask_max_percentage=0.1,
                                               probability=0.5)])

    train_dataset = SpeechDataset(transform=train_transform)
    valid_dataset = SpeechDataset(transform=valid_transform)
    indices = list(range(len(train_dataset)))
    train_dataset = Subset(train_dataset, indices[:-args.valid_batch_size])
    valid_dataset = Subset(valid_dataset, indices[-args.valid_batch_size:])

train_data_sampler, valid_data_sampler = None, None
if args.distributed:
    train_data_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    valid_data_sampler = torch.utils.data.distributed.DistributedSampler(valid_dataset)
train_data_loader = DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=(train_data_sampler is None),
                               collate_fn=collate_fn, num_workers=args.dataload_workers_nums,
                               sampler=train_data_sampler)
valid_data_loader = DataLoader(valid_dataset, batch_size=args.valid_batch_size, shuffle=False,
                               collate_fn=collate_fn, num_workers=args.dataload_workers_nums,
                               sampler=None)

if args.model == 'quartznet5x5':
    model = QuartzNet5x5(vocab=vocab, num_features=num_features)
def decode_classes_from_layers(gpu,
                               inference,
                               generator,
                               image_size,
                               n_filters,
                               noise_dim,
                               data_path,
                               dataset,
                               nonlinear=False,
                               lr=0.001,
                               folds=10,
                               epochs=50,
                               hidden_size=1000,
                               wd=1e-4,
                               opt='adam',
                               lr_schedule=False,
                               batch_size=128,
                               workers=4,
                               verbose=True):
    """ Trains a linear or nonlinear decoder from a given layer of the cortex, all layers at a time (including inputs)

    Does k-fold CV on the test set of this dataset. A random permutation is used.

    Returns a tensor of accuracies on each of the k folds and for each of the 6 decoders:
                                                        Input ---  Layer1 .... Layer4 --- Noise


   """

    # ----- Get dataset ------ #
    # Data loading code
    valdir = os.path.join(data_path, 'val')
    normalize = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

    if dataset in ['imagenet', 'folder', 'lfw']:
        # folder dataset
        all_test_dataset = datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(image_size),
                transforms.CenterCrop(image_size),
                transforms.ToTensor(),
                normalize,
            ]))

        nc = 3
        n_classes = 1000
    elif dataset == 'cifar10':
        all_test_dataset = datasets.CIFAR10(root=data_path,
                                            download=True,
                                            train=False,
                                            transform=transforms.Compose([
                                                transforms.Resize(image_size),
                                                transforms.ToTensor(),
                                                transforms.Normalize(
                                                    (0.5, 0.5, 0.5),
                                                    (0.5, 0.5, 0.5)),
                                            ]))
        nc = 3
        n_classes = 10
    elif dataset == 'mnist':
        all_test_dataset = datasets.MNIST(root=data_path,
                                          download=True,
                                          train=False,
                                          transform=transforms.Compose([
                                              transforms.Resize(image_size),
                                              transforms.ToTensor(),
                                              transforms.Normalize((0.5, ),
                                                                   (0.5, )),
                                          ]))
        nc = 1
        n_classes = 10

    assert all_test_dataset

    perm = torch.randperm(len(all_test_dataset))
    n_test_examples = len(all_test_dataset) // folds

    all_accuracies = []
    all_reconstructions = []
    for f in range(folds):
        # ---- Get CV indices ----
        test_idx = perm[f * n_test_examples:(f + 1) * n_test_examples]
        if f == folds - 1:
            #last fold may be larger if len(all_test_dataset) % folds != 0
            test_idx = perm[f * n_test_examples:]

        train_idx = torch.cat(
            (perm[:f * n_test_examples], perm[(f + 1) * n_test_examples:]))

        #  ----- Make loaders -----
        train_dataset = Subset(all_test_dataset, train_idx)
        test_dataset = Subset(all_test_dataset, test_idx)

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=workers,
            pin_memory=True,
        )
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=workers,
            pin_memory=True,
        )

        # ----- Build decoder ------
        if nonlinear:
            decoder = NonlinearDecoder(image_size, noise_dim, n_classes, nc,
                                       n_filters, hidden_size)
        else:
            decoder = LinearDecoder(image_size, noise_dim, n_classes, nc,
                                    n_filters)

        # get to proper GPU
        torch.cuda.set_device(gpu)
        inference = inference.cuda(gpu)
        generator = generator.cuda(gpu)
        decoder = decoder.cuda(gpu)

        # ------ Build optimizer ------ #

        if opt == 'adam':
            optimizer = optim.Adam(decoder.parameters(),
                                   lr=lr,
                                   betas=(.9, 0.999),
                                   weight_decay=wd)
        elif opt == 'sgd':
            optimizer = optim.SGD(decoder.parameters(),
                                  lr=lr,
                                  momentum=0.9,
                                  weight_decay=wd)
        else:
            raise AssertionError("This optimizer not implemented yet.")

        for epoch in range(epochs):
            if lr_schedule:
                adjust_lr(epoch, optimizer, epochs)
            train(inference, optimizer, decoder, train_loader, gpu)
            if verbose or (epoch == epochs - 1):
                accuracies, reconstructions = test(inference, generator,
                                                   decoder,
                                                   test_loader, gpu, epoch,
                                                   len(test_idx), verbose)

        all_accuracies.append(accuracies)
        all_reconstructions.append(reconstructions)

    return torch.Tensor(all_accuracies), torch.stack(all_reconstructions)
示例#9
0
def run():
    args = parser.parse_args()
    nlayer = args.nlayer
    bidirection = args.bidirection
    file_path = args.file_path#'/content/drive/My Drive/Master_Final_Project/Genetic_attack/Code/nlp_adversarial_example_master_pytorch/glove.840B.300d.txt'#'/lustre/scratch/scratch/ucabdc3/lstm_attack'
    save_path = os.path.join(file_path, 'results')
    MAX_VOCAB_SIZE = 50000
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#    with open(os.path.join(file_path, 'dataset_%d.pkl' %MAX_VOCAB_SIZE), 'rb') as f:
#        dataset = pickle.load(f)
        
    with open('aux_files/dataset_%d.pkl' %MAX_VOCAB_SIZE, 'rb') as f:
        dataset = pickle.load(f)

        
#    skip_list = np.load('aux_files/missed_embeddings_counter_%d.npy' %MAX_VOCAB_SIZE)
    embedding_matrix = np.load('aux_files/embeddings_glove_%d.npy' %(MAX_VOCAB_SIZE))
    embedding_matrix = torch.tensor(embedding_matrix.T).to(device)
    
#    goog_lm = LM()
    
    # pytorch
    max_len = 100
#    padded_train_raw = pad_sequences(dataset.train_seqs2, maxlen = max_len, padding = 'post')
    padded_test_raw = pad_sequences(dataset.test_seqs2, maxlen = max_len, padding = 'post')
#    # TrainSet
#    data_set = Data_infor(padded_train_raw, dataset.train_y)
#    num_train = len(data_set)
#    indx = list(range(num_train))
#    train_set = Subset(data_set, indx)
    
    # TestSet
    batch_size = 1
    SAMPLE_SIZE = args.sample_size
    data_set = Data_infor(padded_test_raw, dataset.test_y)
    num_test = len(data_set)
    indx = list(range(num_test))
    
    all_test_set  = Subset(data_set, indx)
    indx = random.sample(indx, SAMPLE_SIZE)
    test_set = Subset(data_set, indx)
    test_loader = DataLoader(test_set, batch_size = batch_size, shuffle = False, pin_memory=True)
    all_test_loader  = DataLoader(all_test_set, batch_size = 128, shuffle = True)
    
    lstm_size = 128
    rnn_state_save = os.path.join(file_path,'best_lstm_0.7_0.001_test2')
    glove_len = args.glove_len
    model = SentimentAnalysis(batch_size=batch_size, embedding_matrix = embedding_matrix, hidden_size = lstm_size, kept_prob = 0.8, num_layers=nlayer, bidirection=bidirection, embedding_dim = glove_len)
    
    model.load_state_dict(torch.load(rnn_state_save))
    model = model.to(device)
    
    
    model.eval()
    test_pred = torch.tensor([])
    test_targets = torch.tensor([])

    with torch.no_grad():
      for batch_index, (seqs, length, target) in enumerate(all_test_loader):
        seqs, target, length = seqs.to(device), target.to(device), length.to(device)
        seqs = seqs.type(torch.LongTensor)
        len_order = torch.argsort(length, descending = True)
        length = length[len_order]
        seqs = seqs[len_order]
        target = target[len_order]

        output, pred_out = model.pred(seqs, length, False)
        test_pred = torch.cat((test_pred, pred_out.cpu()), dim = 0)
        test_targets = torch.cat((test_targets, target.type(torch.float).cpu()))

      accuracy = model.evaluate_accuracy(test_pred.numpy(), test_targets.numpy())
    print('Test Accuracy:{:.4f}.'.format(accuracy))

    
    
    n1 = 8
    n2 = 4
    pop_size = 60
    max_iters = 20
    n_prefix = 5
    n_suffix = 5
    batch_model = SentimentAnalysis(batch_size=pop_size, embedding_matrix = embedding_matrix, hidden_size = lstm_size, kept_prob = 0.8, num_layers=nlayer, bidirection=bidirection,embedding_dim = glove_len)
    
    batch_model.eval()
    batch_model.load_state_dict(torch.load(rnn_state_save))
    batch_model.to(device)
    
    neighbour_model = SentimentAnalysis(batch_size=n1, embedding_matrix = embedding_matrix, hidden_size = lstm_size, kept_prob = 0.8, num_layers=nlayer, bidirection=bidirection, embedding_dim = glove_len)
    
    neighbour_model.eval()
    neighbour_model.load_state_dict(torch.load(rnn_state_save))
    neighbour_model.to(device)
    lm_model = gpt_2_get_words_probs()
    use_lm = args.use_lm
    ga_attack = GeneticAttack_pytorch(model, batch_model, neighbour_model, compute_dis,
               lm_model, max_iters = max_iters, dataset = dataset,
               pop_size = pop_size, n1 = n1, n2 = n2, n_prefix = n_prefix,
               n_suffix = n_suffix, use_lm = use_lm, use_suffix = True)
    
    
    TEST_SIZE = args.test_size
    order_pre = 0
    n = 0
    seq_success = []
    seq_orig = []
    seq_orig_label = []
    word_varied = []
    orig_list = []
    adv_list =[]
    dist_list = []
    
    # if order_pre != 0:
    #   seq_success = np.load(os.path.join(save_path,'seq_success.npy'), allow_pickle = True).tolist()
    #   seq_orig = np.load(os.path.join(save_path,'seq_orig.npy')).tolist()
    #   seq_orig_label = np.load(os.path.join(save_path,'seq_orig_label.npy')).tolist()
    #   word_varied = np.load(os.path.join(save_path,'word_varied.npy'), allow_pickle = True).tolist()
    #   n = len(seq_success)
    
    for order, (seq, l, target) in enumerate(test_loader):
    
      if order>=order_pre:
#        print('Sequence number:{}'.format(order))
        seq_len = np.sum(np.sign(seq.numpy()))
        seq, l = seq.to(device), l.to(device)
        seq = seq.type(torch.LongTensor)
        model.eval()
        with torch.no_grad():
          preds = model.pred(seq, l, False)[1]
          orig_pred = np.argmax(preds.cpu().detach().numpy())
        if orig_pred != target.numpy()[0]:
#          print('Wrong original prediction')
#          print('----------------------')
          continue
        if seq_len > 100:
#          print('Sequence is too long')
#          print('----------------------')
          continue
        print('Sequence number:{}'.format(order))
        print('Length of sentence: {}, Number of samples:{}'.format(l.item(), n+1))
        print(preds)
        seq_orig.append(seq[0].numpy())
        seq_orig_label.append(target.numpy()[0])
        target = 1-target.numpy()[0]
        # seq_success.append(ga_attack.attack(seq, target, l))
        
        # if None not in np.array(seq_success[n]):
        #   w_be = [dataset.inv_dict[seq_orig[n][i]] for i in list(np.where(seq_success[n] != seq_orig[n])[0])]
        #   w_to = [dataset.inv_dict[seq_success[n][i]] for i in list(np.where(seq_success[n] != seq_orig[n])[0])]
        #   for i in range(len(w_be)):
        #     print('{} ----> {}'.format(w_be[i], w_to[i]))
        #   word_varied.append([w_be]+[w_to])
        # else:
        #   print('Fail')
        # print('----------------------')
        # n += 1
        
        # np.save(os.path.join(save_path,'seq_success_1000.npy'), np.array(seq_success))
        # np.save(os.path.join(save_path,'seq_orig_1000.npy'), np.array(seq_orig))
        # np.save(os.path.join(save_path,'seq_orig_label_1000.npy'), np.array(seq_orig_label))
        # np.save(os.path.join(save_path,'word_varied_1000.npy'), np.array(word_varied))
        
        # if n>TEST_SIZE:
        #   break 
        
        
        orig_list.append(seq[0].numpy())
        x_adv = ga_attack.attack( seq, target, l)
        adv_list.append(x_adv)
        if x_adv is None:
            print('%d failed' %(order))
            dist_list.append(100000)
        else:
            num_changes = np.sum(seq[0].numpy() != x_adv)
            print('%d - %d changed.' %(order, num_changes))
            dist_list.append(num_changes)
            # display_utils.visualize_attack(sess, model, dataset, x_orig, x_adv)
            # display_utils.visualize_attack(sess, model, dataset, x_orig, x_adv)
            w_be = [dataset.inv_dict[seq[0].numpy().tolist()[i]] for i in list(np.where(seq[0].numpy() != np.array(x_adv))[0])]
            w_to = [dataset.inv_dict[x_adv[i]] for i in list(np.where(seq[0].numpy() != np.array(x_adv))[0])]
            for i in range(len(w_be)):
              print('{} ----> {}'.format(w_be[i], w_to[i]))
        
          
        
        n += 1
        if n>TEST_SIZE:
          break
        orig_len = [np.sum(np.sign(x)) for x in orig_list]
        normalized_dist_list = [dist_list[i]/orig_len[i] for i in range(len(orig_list)) ]
        SUCCESS_THRESHOLD  = 0.25
        successful_attacks = [x <= SUCCESS_THRESHOLD for x in normalized_dist_list]
        print('Attack success rate : {:.2f}%'.format(np.mean(successful_attacks)*100))
        SUCCESS_THRESHOLD  = 0.2
        successful_attacks = [x <= SUCCESS_THRESHOLD for x in normalized_dist_list]
        print('Attack success rate : {:.2f}%'.format(np.mean(successful_attacks)*100))
        print('--------------------------')
示例#10
0
def create_dataloader(dataset_type, root):
    if dataset_type == 'mnist':
        mean = (0.1307, )
        std = (0.3081, )
        transform = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize(mean=mean, std=std)])

        # load dataset
        train_set = datasets.MNIST(root,
                                   train=True,
                                   transform=transform,
                                   download=True)
        test_set = datasets.MNIST(root,
                                  train=False,
                                  transform=transform,
                                  download=False)
        val_set = test_set

        indices = np.arange(len(train_set))
        np.random.shuffle(indices)
        labeled_set = Subset(train_set, indices=indices[:args.num_labeled])
        train_set = datasets.MNIST(root,
                                   train=True,
                                   transform=TransformFixMatch(mean, std),
                                   download=False)
        unlabeled_set = Subset(train_set, indices=indices[:args.num_unlabeled])

    elif dataset_type == 'cifar10':
        mean = [0.49139968, 0.48215827, 0.44653124]
        std = [0.24703233, 0.24348505, 0.26158768]

        transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])

        test_transform = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize(mean, std)])

        # load dataset
        train_set = datasets.CIFAR10(root,
                                     train=True,
                                     transform=transform,
                                     download=True)
        test_set = datasets.CIFAR10(root,
                                    train=False,
                                    transform=test_transform,
                                    download=False)
        val_set = test_set

        labeled_set = Subset(train_set,
                             indices=np.random.permutation(
                                 len(train_set))[:args.num_labeled])
        train_set = datasets.CIFAR10(root,
                                     train=True,
                                     transform=TransformFixMatch(mean, std),
                                     download=False)
        unlabeled_set = Subset(train_set,
                               indices=np.random.permutation(
                                   len(train_set))[:args.num_unlabeled])

    elif dataset_type == 'cifar100':
        mean = [0.5071, 0.4865, 0.4409]
        std = [0.2673, 0.2564, 0.2762]

        transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(20),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])

        test_transform = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize(mean, std)])

        # load dataset
        train_set = datasets.CIFAR100(root,
                                      train=True,
                                      transform=transform,
                                      download=True)
        test_set = datasets.CIFAR100(root,
                                     train=False,
                                     transform=test_transform,
                                     download=False)
        val_set = test_set

        labeled_set = Subset(train_set,
                             indices=np.random.permutation(
                                 len(train_set))[:args.num_labeled])
        train_set = datasets.MNIST(root,
                                   train=True,
                                   transform=TransformFixMatch(mean, std),
                                   download=False)
        unlabeled_set = Subset(train_set,
                               indices=np.random.permutation(
                                   len(train_set))[:args.num_unlabeled])

    # generate DataLoader
    # train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True)
    labeled_loader = DataLoader(labeled_set,
                                batch_size=args.batch_size,
                                shuffle=True)
    unlabeled_loader = DataLoader(unlabeled_set,
                                  batch_size=args.batch_size * args.mu,
                                  shuffle=True)
    val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False)
    test_loader = DataLoader(test_set,
                             batch_size=args.batch_size,
                             shuffle=False)

    print('Labeled data:', len(labeled_set), 'Unlabeled data:',
          len(unlabeled_set))
    return labeled_loader, unlabeled_loader, val_loader, test_loader
示例#11
0
def get_dataloaders(dataset,
                    batch,
                    num_workers,
                    dataroot,
                    ops_names,
                    magnitudes,
                    cutout,
                    cutout_length,
                    split=0.5,
                    split_idx=0,
                    target_lb=-1):
    """

    Args:
        dataset: str
        batch: int
        num_workers: int
        dataroot: the dataset dir
        ops_names: list[tuple], [N=105, K=2], str
        magnitudes: tensor, shape [N, k]
        cutout: boolean,
        cutout_length: int
        split: float, default 0.5
        split_idx: int, the number of the next(StratifiedShuffleSplit.split) function is called is equal `split_idx` + 1
        target_lb: int, target label, if `target_lb` > 0, the train_label only include the `target_lb`

    Returns:

    """
    if 'cifar' in dataset or 'svhn' in dataset:
        transform_train_pre = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
        ])
        transform_train_after = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD),
        ])
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD),
        ])
    elif 'imagenet' in dataset:
        transform_train_pre = transforms.Compose([
            transforms.RandomResizedCrop(224,
                                         scale=(0.08, 1.0),
                                         interpolation=Image.BICUBIC),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(
                brightness=0.4,
                contrast=0.4,
                saturation=0.4,
            ),
        ])
        transform_train_after = transforms.Compose([
            transforms.ToTensor(),
            Lighting(0.1, _IMAGENET_PCA['eigval'], _IMAGENET_PCA['eigvec']),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
        transform_test = transforms.Compose([
            transforms.Resize(256, interpolation=Image.BICUBIC),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
    else:
        raise ValueError('dataset=%s' % dataset)

    if cutout and cutout_length != 0:
        transform_train_after.transforms.append(CutoutDefault(cutout_length))

    if dataset == 'cifar10':
        total_trainset = torchvision.datasets.CIFAR10(root=dataroot,
                                                      train=True,
                                                      download=True,
                                                      transform=None)
        total_trainset.train_data = total_trainset.train_data[:100]
        total_trainset.train_labels = total_trainset.train_labels[:100]
        # testset = torchvision.datasets.CIFAR10(root=dataroot, train=False, download=True, transform=None)
        total_trainset.targets = total_trainset.train_labels
    elif dataset == 'reduced_cifar10':
        total_trainset = torchvision.datasets.CIFAR10(root=dataroot,
                                                      train=True,
                                                      download=True,
                                                      transform=None)
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=46000,
                                     random_state=0)  # 4000 trainset
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.train_labels)
        train_idx, valid_idx = next(sss)
        targets = [total_trainset.train_labels[idx] for idx in train_idx]
        total_trainset = Subset(total_trainset, train_idx)
        total_trainset.targets = targets

        # testset = torchvision.datasets.CIFAR10(root=dataroot, train=False, download=True, transform=None)
    elif dataset == 'cifar100':
        total_trainset = torchvision.datasets.CIFAR100(root=dataroot,
                                                       train=True,
                                                       download=True,
                                                       transform=None)
        total_trainset.targets = total_trainset.test_labels
        # testset = torchvision.datasets.CIFAR100(root=dataroot, train=False, download=True, transform=transform_test)
    elif dataset == 'reduced_cifar100':
        total_trainset = torchvision.datasets.CIFAR100(root=dataroot,
                                                       train=True,
                                                       download=True,
                                                       transform=None)
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=46000,
                                     random_state=0)  # 4000 trainset
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.targets)
        train_idx, valid_idx = next(sss)
        targets = [total_trainset.targets[idx] for idx in train_idx]
        total_trainset = Subset(total_trainset, train_idx)
        total_trainset.targets = targets

        # testset = torchvision.datasets.CIFAR10(root=dataroot, train=False, download=True, transform=None)
    elif dataset == 'svhn':
        trainset = torchvision.datasets.SVHN(root=dataroot,
                                             split='train',
                                             download=True,
                                             transform=None)
        extraset = torchvision.datasets.SVHN(root=dataroot,
                                             split='extra',
                                             download=True,
                                             transform=None)
        total_trainset = ConcatDataset([trainset, extraset])
        # testset = torchvision.datasets.SVHN(root=dataroot, split='test', download=True, transform=transform_test)
    elif dataset == 'reduced_svhn':
        total_trainset = torchvision.datasets.SVHN(root=dataroot,
                                                   split='train',
                                                   download=True,
                                                   transform=None)
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=73257 - 1000,
                                     random_state=0)  # 1000 trainset
        # sss = sss.split(list(range(len(total_trainset))), total_trainset.targets)
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.labels)
        train_idx, valid_idx = next(sss)
        # targets = [total_trainset.targets[idx] for idx in train_idx]
        targets = [total_trainset.labels[idx] for idx in train_idx]
        total_trainset = Subset(total_trainset, train_idx)
        # total_trainset.targets = targets
        total_trainset.labels = targets
        total_trainset.targets = targets

        # testset = torchvision.datasets.SVHN(root=dataroot, split='test', download=True, transform=transform_test)
    elif dataset == 'imagenet':
        total_trainset = ImageNet(root=os.path.join(dataroot,
                                                    'imagenet-pytorch'),
                                  download=True,
                                  transform=None)
        # testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), split='val', transform=transform_test)

        # compatibility
        total_trainset.targets = [lb for _, lb in total_trainset.samples]
    elif dataset == 'reduced_imagenet':
        # randomly chosen indices
        idx120 = [
            904, 385, 759, 884, 784, 844, 132, 214, 990, 786, 979, 582, 104,
            288, 697, 480, 66, 943, 308, 282, 118, 926, 882, 478, 133, 884,
            570, 964, 825, 656, 661, 289, 385, 448, 705, 609, 955, 5, 703, 713,
            695, 811, 958, 147, 6, 3, 59, 354, 315, 514, 741, 525, 685, 673,
            657, 267, 575, 501, 30, 455, 905, 860, 355, 911, 24, 708, 346, 195,
            660, 528, 330, 511, 439, 150, 988, 940, 236, 803, 741, 295, 111,
            520, 856, 248, 203, 147, 625, 589, 708, 201, 712, 630, 630, 367,
            273, 931, 960, 274, 112, 239, 463, 355, 955, 525, 404, 59, 981,
            725, 90, 782, 604, 323, 418, 35, 95, 97, 193, 690, 869, 172
        ]
        total_trainset = ImageNet(root=os.path.join(dataroot,
                                                    'imagenet-pytorch'),
                                  transform=None)
        # testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), split='val', transform=transform_test)

        # compatibility
        total_trainset.targets = [lb for _, lb in total_trainset.samples]

        # sss = StratifiedShuffleSplit(n_splits=1, test_size=len(total_trainset) - 6000, random_state=0)  # 4000 trainset
        # sss = StratifiedShuffleSplit(n_splits=1, test_size=0, random_state=0)  # 4000 trainset
        # sss = sss.split(list(range(len(total_trainset))), total_trainset.targets)
        # train_idx, valid_idx = next(sss)
        # print(len(train_idx), len(valid_idx))

        # filter out
        # train_idx = list(filter(lambda x: total_trainset.labels[x] in idx120, train_idx))
        # valid_idx = list(filter(lambda x: total_trainset.labels[x] in idx120, valid_idx))
        # # test_idx = list(filter(lambda x: testset.samples[x][1] in idx120, range(len(testset))))
        train_idx = list(range(len(total_trainset)))

        filter_train_idx = list(
            filter(lambda x: total_trainset.targets[x] in idx120, train_idx))
        # valid_idx = list(filter(lambda x: total_trainset.targets[x] in idx120, valid_idx))
        # test_idx = list(filter(lambda x: testset.samples[x][1] in idx120, range(len(testset))))
        # print(len(filter_train_idx))

        targets = [
            idx120.index(total_trainset.targets[idx])
            for idx in filter_train_idx
        ]
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=len(filter_train_idx) - 6000,
                                     random_state=0)  # 4000 trainset
        sss = sss.split(list(range(len(filter_train_idx))), targets)
        train_idx, valid_idx = next(sss)
        train_idx = [filter_train_idx[x] for x in train_idx]
        valid_idx = [filter_train_idx[x] for x in valid_idx]

        targets = [
            idx120.index(total_trainset.targets[idx]) for idx in train_idx
        ]
        for idx in range(len(total_trainset.samples)):
            if total_trainset.samples[idx][1] not in idx120:
                continue
            total_trainset.samples[idx] = (total_trainset.samples[idx][0],
                                           idx120.index(
                                               total_trainset.samples[idx][1]))
        total_trainset = Subset(total_trainset, train_idx)
        total_trainset.targets = targets

        # for idx in range(len(testset.samples)):
        #     if testset.samples[idx][1] not in idx120:
        #         continue
        #     testset.samples[idx] = (testset.samples[idx][0], idx120.index(testset.samples[idx][1]))
        # testset = Subset(testset, test_idx)
        print('reduced_imagenet train=', len(total_trainset))
    else:
        raise ValueError('invalid dataset name=%s' % dataset)

    train_sampler = None
    if split > 0.0:
        sss = StratifiedShuffleSplit(n_splits=5,
                                     test_size=split,
                                     random_state=0)
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.targets)
        for _ in range(split_idx + 1):
            train_idx, valid_idx = next(sss)

        if target_lb >= 0:
            train_idx = [
                i for i in train_idx if total_trainset.targets[i] == target_lb
            ]
            valid_idx = [
                i for i in valid_idx if total_trainset.targets[i] == target_lb
            ]

        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetSampler(valid_idx)

        # if horovod:
        #     import horovod.torch as hvd
        #     train_sampler = torch.utils.data.distributed.DistributedSampler(train_sampler, num_replicas=hvd.size(), rank=hvd.rank())
    else:
        valid_sampler = SubsetSampler([])

        # if horovod:
        #     import horovod.torch as hvd
        #     train_sampler = torch.utils.data.distributed.DistributedSampler(valid_sampler, num_replicas=hvd.size(), rank=hvd.rank())
    train_data = AugmentDataset(total_trainset, transform_train_pre,
                                transform_train_after, transform_test,
                                ops_names, True, magnitudes)
    valid_data = AugmentDataset(total_trainset, transform_train_pre,
                                transform_train_after, transform_test,
                                ops_names, False, magnitudes)

    trainloader = torch.utils.data.DataLoader(train_data,
                                              batch_size=batch,
                                              shuffle=False,
                                              sampler=train_sampler,
                                              drop_last=False,
                                              pin_memory=True,
                                              num_workers=num_workers)

    validloader = torch.utils.data.DataLoader(
        valid_data,
        batch_size=batch,
        # sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
        sampler=valid_sampler,
        drop_last=False,
        pin_memory=True,
        num_workers=num_workers)

    # trainloader = torch.utils.data.DataLoader(
    #     total_trainset, batch_size=batch, shuffle=True if train_sampler is None else False, num_workers=32, pin_memory=True,
    #     sampler=train_sampler, drop_last=True)
    # validloader = torch.utils.data.DataLoader(
    #     total_trainset, batch_size=batch, shuffle=False, num_workers=16, pin_memory=True,
    #     sampler=valid_sampler, drop_last=False)

    # testloader = torch.utils.data.DataLoader(
    #     testset, batch_size=batch, shuffle=False, num_workers=32, pin_memory=True,
    #     drop_last=False
    # )
    print(len(train_data))
    return trainloader, validloader
示例#12
0
    def train(self):
        data_path = self.config.get('TRAIN', 'data_path')
        batch_size = self.config.getint('TRAIN', 'batch_size')
        max_epoches = self.config.getint('TRAIN', 'max_epoches')
        checkpoint = self.config.getint('TRAIN', 'checkpoint')
        is_checkpoint = self.config.getboolean('TRAIN', 'is_checkpoint')
        gcn_path = data_path

        transformer = PreProcessing(is_train=True)

        trainval_dataset = NetworkDataset(
            data_path,
            gcn_path,
            # self.node_metrics,
            self.interface_metrics,
            self.gcn_metrics,
            # self.bgp_metrics,
            transformer,
            # is_train=True
        )

        train_indices, val_indices = train_test_split(
            list(range(len(trainval_dataset))),
            test_size=0.1,
            stratify=trainval_dataset.label,
            random_state=self.seed)
        train_dataset = Subset(trainval_dataset, train_indices)
        train_size = len(train_dataset)
        val_dataset = Subset(trainval_dataset, val_indices)
        val_size = len(val_dataset)
        print(f'train size : {train_size} val size: {val_size}')

        train_dataloader = DataLoader(trainval_dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      collate_fn=collate)
        val_dataloader = DataLoader(val_dataset,
                                    batch_size=val_size,
                                    shuffle=True,
                                    collate_fn=collate)

        input_dim = trainval_dataset.column_dim
        # input_dim2 = len(self.interface_metrics)
        # input_dim3 = 2
        # input_dim4 = len(self.bgp_metrics)
        target_dim = len(self.events.keys())
        model = GraphClassifier(input_dim, target_dim).to(self.device)
        # model = DataParallel(model)
        model.double()

        if is_checkpoint:
            print('./models/gcn_{:}.model'.format(checkpoint))
            model.load_state_dict(
                torch.load('./models/gcn_{:}.model'.format(checkpoint)))

        loss_function = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=1e-4)

        li_times = []

        for epoch in range(1 + checkpoint, max_epoches + 1):
            if self.device == "cuda":
                torch.cuda.synchronize()
            since = int(round(time.time() * 1000))
            running_loss, correct, total = (0, 0, 0)
            for train_inputs, train_labels in train_dataloader:
                train_scores = model(train_inputs)
                train_labels = train_labels.to(self.device)
                loss = loss_function(train_scores, train_labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                _, predict = torch.max(train_scores, 1)
                correct += (predict == train_labels).sum().item()
                total += train_labels.size(0)
            train_loss = running_loss / len(train_dataloader)
            train_acc = correct / total

            with torch.no_grad():
                val_inputs, val_labels = iter(val_dataloader).next()
                val_scores = model(val_inputs)
                val_labels = val_labels.to(self.device)
                val_loss = loss_function(val_scores, val_labels)

                bi_scores = torch.argmax(val_scores, dim=1).to('cpu')
                y_val_scores = val_labels.to('cpu').numpy()
                val_acc = accuracy_score(y_val_scores, bi_scores)

            if self.device == "cuda":
                torch.cuda.synchronize()
            time_elapsed = int(round(time.time() * 1000)) - since
            li_times.append(time_elapsed)
            print(
                'EPOCH [{}/{}] train loss: {} train acc: {} val loss: {} val acc: {}, elapsed: {}ms'
                .format(epoch, max_epoches, train_loss, train_acc, val_loss,
                        val_acc, time_elapsed))

            if epoch % 10 == 0:
                print("save model")
                torch.save(model.state_dict(),
                           "{:}/gcn_{:}.model".format(self.model_dir, epoch))
        print(np.sum(li_times))
示例#13
0
    def run(self):
        # Get Data & MetaData
        input_size, input_channels, num_classes, train_data, test_data = get_data(
            dataset_name=config.DATASET,
            data_path=config.DATAPATH,
            cutout_length=16,
            test=True)

        # Train, Test Data Loaders
        n_train = len(train_data)
        n_test = len(test_data)
        if config.PERCENTAGE_OF_DATA < 100:
            n_train = (n_train // 100) * config.PERCENTAGE_OF_DATA
            n_test = (n_test // 100) * config.PERCENTAGE_OF_DATA
            '''
            train_data = train_data[:n_train]
            test_data = test_data[:n_test]
            test_data = test_data[:n_test]
            '''
            # take a random sample of the indices
            train_data = Subset(
                train_data,
                np.random.choice(range(len(train_data)),
                                 size=n_train,
                                 replace=False))
            test_data = Subset(
                test_data,
                np.random.choice(range(len(test_data)),
                                 size=n_test,
                                 replace=False))

        train_loader = torch.utils.data.DataLoader(
            train_data,
            batch_size=config.BATCH_SIZE,
            num_workers=config.NUM_DOWNLOAD_WORKERS,
            pin_memory=config.PIN_MEMORY)
        test_loader = torch.utils.data.DataLoader(
            test_data,
            batch_size=config.BATCH_SIZE,
            num_workers=config.NUM_DOWNLOAD_WORKERS,
            pin_memory=config.PIN_MEMORY)

        # Create Model
        print("Alpha Normal")
        print_alpha(self.alpha_normal)
        print("Alpha Reduce")
        print_alpha(self.alpha_reduce)
        print("Creating Model from these Alpha\n\n")
        self.model = LearntModel(alpha_normal=self.alpha_normal,
                                 alpha_reduce=self.alpha_reduce,
                                 num_cells=config.NUM_CELLS,
                                 channels_in=input_channels,
                                 channels_start=config.CHANNELS_START,
                                 stem_multiplier=config.STEM_MULTIPLIER,
                                 num_classes=num_classes,
                                 primitives=OPS,
                                 auxiliary=(not config.NO_AUXILIARY))

        # Port model to gpu if availabile
        if torch.cuda.is_available():
            self.model = self.model.cuda()
            # cuDNN optimizations if possible
            torch.backends.cudnn.benchmark = True
            torch.backends.cudnn.enabled = True

        # Weights Optimizer
        w_optim = torch.optim.SGD(params=self.model.parameters(),
                                  lr=config.WEIGHTS_LR,
                                  momentum=config.WEIGHTS_MOMENTUM,
                                  weight_decay=config.WEIGHTS_WEIGHT_DECAY)

        # Learning Rate Scheduler
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            w_optim, config.EPOCHS, eta_min=config.WEIGHTS_LR_MIN)

        # Register Signal Handler for interrupts & kills
        signal.signal(signal.SIGINT, self.terminate)

        # Number of parameters
        print("# of Parameters (M)", count_parameters_in_millions(self.model))

        # Training Loop
        best_top1 = 0.
        loss_criterion = nn.CrossEntropyLoss()
        for epoch in range(config.EPOCHS):
            lr = lr_scheduler.get_lr()[0]

            # Training (One epoch)
            self.train(train_loader=train_loader,
                       model=self.model,
                       w_optim=w_optim,
                       epoch=epoch,
                       lr=lr,
                       gradient_clip=config.WEIGHTS_GRADIENT_CLIP,
                       epochs=config.EPOCHS,
                       loss_criterion=loss_criterion)

            # Learning Rate Step
            lr_scheduler.step()

            # Test (One epoch)
            cur_step = (epoch + 1) * len(train_loader)
            top1 = self.test(test_loader=test_loader,
                             model=self.model,
                             epoch=epoch,
                             cur_step=cur_step,
                             epochs=config.EPOCHS)

            # Save Checkpoint
            # Creates checkpoint directory if it doesn't exist
            if not os.path.exists(config.CHECKPOINT_PATH + "/" +
                                  config.DATASET + "/" + self.dt_string):
                os.makedirs(config.CHECKPOINT_PATH + "/" + config.DATASET +
                            "/" + self.dt_string)
            # torch.save(self.model, config.CHECKPOINT_PATH + "/" + config.DATASET + "/" + self.dt_string + "/" + str(epoch) + ".pt")
            if best_top1 < top1:
                best_top1 = top1
                torch.save(
                    self.model, config.CHECKPOINT_PATH + "/" + config.DATASET +
                    "/" + self.dt_string + "/" + "best.pt")
            # GPU Memory Allocated for Model in Weight Sharing Phase
            if epoch == 0:
                try:
                    print(
                        "Learnt Architecture Training: Max GPU Memory Used",
                        torch.cuda.max_memory_allocated() /
                        (1024 * 1024 * 1024), "GB")
                except:
                    print("Unable to retrieve memory data")

        # Log Best Accuracy so far
        print("Final best Prec@1 = {:.4%}".format(best_top1))

        self.terminate()
示例#14
0
文件: base.py 项目: imirzadeh/CL-Gym
    def load_memory(
            self,
            task: int,
            batch_size: int,
            shuffle: Optional[bool] = True,
            num_workers: Optional[int] = 0,
            pin_memory: Optional[bool] = True
    ) -> Tuple[DataLoader, DataLoader]:
        """
        Makes dataloaders for episodic memory/replay buffer.
        
        Args:
            task: The task number.
            batch_size: The batch_size for dataloaders.
            shuffle: Should loaders be shuffled? Default: True.
            num_workers: corresponds to Pytorch's `num_workers` argument. Default: 0
            pin_memory: corresponds to Pytorch's `pin_memory` argument. Default: True.

        Returns:
            a Tuple of dataloaders, i.e., (train_loader, validation_loader).

        Examples::
            >>> benchmark = Benchmark(num_tasks=2, per_task_memory_examples=16)
            >>> # task 1 memory loaders: returns 2 batches (i.e., 16 examples)
            >>> mem_train_loader_1, mem_val_loader_1 = benchmark.load_memory(1, batch_size=8)
            >>> # task 2 memory loaders: returns 4 batches (i.e., 16 examples)
            >>> mem_train_loader_2, mem_val_loader_2 = benchmark.load_memory(2, batch_size=4)

        .. note::
            This method uses `class_uniform` sampling.  i.e., if each task has 10 classes,
            and `per_task_memory_examples=20`, then the returend samples have 2 examples per class.
            
        .. warning::
            The method will throw an error if `Benchmark` is instantiated without :attr:`per_task_memory_examples`.
            The reason is that, behind the scenese, we compute the indices for memory examples in
            `precompute_memory_indices()` method and this method relies on that computations.
        """
        if not self.per_task_memory_examples:
            raise ValueError(
                "Called load_memory() but per_task_memory_examples is not set")

        if task > self.num_tasks:
            raise ValueError(
                f"Asked for memory of task={task} while the benchmark has {self.num_tasks} tasks"
            )

        train_indices = self.memory_indices_train[task]
        test_indices = self.memory_indices_test[task]
        train_dataset = Subset(self.trains[task], train_indices)
        test_dataset = Subset(self.tests[task], test_indices)

        train_loader = DataLoader(train_dataset,
                                  batch_size,
                                  shuffle,
                                  num_workers=num_workers,
                                  pin_memory=pin_memory)
        test_loader = DataLoader(test_dataset,
                                 batch_size,
                                 shuffle,
                                 num_workers=num_workers,
                                 pin_memory=pin_memory)

        return train_loader, test_loader
示例#15
0
##########################
### CIFAR-10 Dataset
##########################

# Note transforms.ToTensor() scales input images
# to 0-1 range

train_indices = torch.arange(0, 49000)
valid_indices = torch.arange(49000, 50000)

train_and_valid = datasets.CIFAR10(root='data',
                                   train=True,
                                   transform=transforms.ToTensor(),
                                   download=True)

train_dataset = Subset(train_and_valid, train_indices)
valid_dataset = Subset(train_and_valid, valid_indices)

test_dataset = datasets.CIFAR10(root='data',
                                train=False,
                                transform=transforms.ToTensor())

#####################################################
### Data Loaders
#####################################################

train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          num_workers=8,
                          shuffle=True)
示例#16
0
from torch import nn, optim
from torch.utils.data import Subset
from torchvision.datasets import CIFAR10
from torchvision.transforms import transforms
import numpy as np
from tqdm import tqdm, trange

from a2c_net import A2CNet
from rl.env_a2c import AttackEnv


if __name__ == '__main__':
    transform = transforms.Compose((transforms.ToTensor(), transforms.Normalize(0.5, 0.5, 0.5)))
    test_dataset = CIFAR10('data', train=False, transform=transform, download=False)
    test_dataset = Subset(test_dataset, range(9000))

    image_size = 32 * 32
    n_classes = 10
    max_episodes = 20
    max_episode_len = 1000

    env = AttackEnv()

    obs_space = env.observation_space
    action_space = env.action_space

    obs_size = obs_space.low.size

    n_actions = action_space.n
示例#17
0
def get_train_eval_loaders(path, batch_size=256):
    """Setup the dataflow:
        - load CIFAR100 train and test datasets
        - setup train/test image transforms
            - horizontally flipped randomly and augmented using cutout.
            - each mini-batch contained 256 examples
        - setup train/test data loaders

    Returns:
        train_loader, test_loader, eval_train_loader
    """
    train_transform = Compose([
        Pad(4),
        RandomCrop(32),
        RandomHorizontalFlip(),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        RandomErasing(),
    ])

    test_transform = Compose([
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    train_dataset = CIFAR100(root=path,
                             train=True,
                             transform=train_transform,
                             download=True)
    test_dataset = CIFAR100(root=path,
                            train=False,
                            transform=test_transform,
                            download=False)

    train_eval_indices = [
        random.randint(0,
                       len(train_dataset) - 1)
        for i in range(len(test_dataset))
    ]
    train_eval_dataset = Subset(train_dataset, train_eval_indices)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              num_workers=12,
                              shuffle=True,
                              drop_last=True,
                              pin_memory=True)

    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             num_workers=12,
                             shuffle=False,
                             drop_last=False,
                             pin_memory=True)

    eval_train_loader = DataLoader(train_eval_dataset,
                                   batch_size=batch_size,
                                   num_workers=12,
                                   shuffle=False,
                                   drop_last=False,
                                   pin_memory=True)

    return train_loader, test_loader, eval_train_loader
示例#18
0
def main(n_train, batch_train_size, n_test, batch_test_size):
    """
        :param n_model: number of models for the comittee
        :param n_train: number of training data to be used, this decides how long the training process will be
        :param batch_train_size: batch size for training process, keep it under 20
        :param idx_ratio: ratio of high entropy:ratio of random
        :return:
        """

    # paths
    img_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'consensus_test', 'example.png')
    save_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'consensus_test')
    csv_name_train = 'train.csv'
    csv_name_test = 'test.csv'
    csv_name_index = 'index.csv'
    dir_name = 'consensus_bulk_40_from_90_005_'
    index_path_name = 'consensus_90_5_005'
    save_weights_flag = True
    cityscape_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes')
    cityscape_loss_weight_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'class_weights.pkl')
    cityscape_pretrain_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscape_pretrain')
    inference_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'inference')
    color_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'color')
    print('cityscape_path: ' + cityscape_path)
    print(dir_name)
    print(index_path_name)

    # arguments
    n_train = 2880
    n_pretrain = 0
    n_test = 500
    n_epoch = 40
    test_factor = 3  # committee only tested every test_factor-th batch
    batch_train_size = 3*max(torch.cuda.device_count(), 1)
    batch_train_size_pretrain = 4
    batch_test_size = 25*max(torch.cuda.device_count(), 1)
    lr = 0.0001
    loss_print = 2
    idx_ratio = [0.0, 1.0]  # proportion to qbc:random
    continue_flag = False
    poly_exp = 1.0
    feature_extract = True
    manual_seed = 10
    np.random.seed(manual_seed)

    # CUDA
    cuda_flag = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_flag else "cpu")
    device_cpu = torch.device("cpu")
    dataloader_kwargs = {'pin_memory': True} if cuda_flag else {}
    print(torch.cuda.device_count(), "GPUs detected")
    torch.manual_seed(manual_seed)
    # print("Max memory allocated:" + str(np.round(torch.cuda.max_memory_allocated(device) / 1e9, 3)) + ' Gb')

    # get data and index library
    mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    transform = T.Compose([T.Resize((800, 800), Image.BICUBIC), T.ToTensor(), T.Normalize(*mean_std)])
    train_dataset = dataset_preset.Dataset_Cityscapes_n(root=cityscape_path, split='train', mode='fine',
                                                        target_type='semantic',
                                                        transform=transform,
                                                        target_transform=segmen_preset.label_id2label,
                                                        n=n_train)
    # read used index
    csv_path_index_source = os.path.join(save_path, index_path_name, csv_name_index)
    with open(csv_path_index_source) as csv_file:
        data = csv_file.readlines()
        train_index = np.array(list(map(int, data[-1][3:str.find(data[-1], ';')].split(','))))
        print(len(train_index))
        # np.random.shuffle(train_index)
        train_index = train_index[int(n_train*0.1):int(n_train*0.5)]
    print(len(train_index))
    train_dataset = Subset(train_dataset, indices=train_index)
    test_dataset = dataset_preset.Dataset_Cityscapes_n_i(root=cityscape_path, split='val', mode='fine',
                                                         target_type='semantic',
                                                         transform=transform,
                                                         target_transform=segmen_preset.label_id2label,
                                                         n=n_test)
    # only test on part of data
    train_dataloader = DataLoader(train_dataset, batch_size=batch_train_size, shuffle=True,
                                  num_workers=3*max(torch.cuda.device_count(), 1), drop_last=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_test_size, shuffle=True,
                                 num_workers=3*max(torch.cuda.device_count(), 1), drop_last=True)
    print("Datasets loaded!")

    # create models, optimizers, scheduler, criterion
    # the models
    fcn_model = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=False, progress=True,
                                                                    num_classes=segmen_preset.n_labels_valid,
                                                                    aux_loss=True)
    fcn_model = fcn_model.cuda()
    fcn_model = nn.DataParallel(fcn_model)

    # the optimizers
    params_to_update = fcn_model.parameters()
    print("Params to learn:")
    if feature_extract:
        params_to_update = []
        for name, param in fcn_model.named_parameters():
            if param.requires_grad == True:
                params_to_update.append(param)
                print("\t", name)
    else:
        for name, param in fcn_model.named_parameters():
            if param.requires_grad == True:
                print("\t", name)
    params = add_weight_decay(fcn_model, l2_value=0.0001)
    '''optimizer = torch.optim.SGD([{'params': fcn_model.module.classifier.parameters()},
                                  {'params': list(fcn_model.module.backbone.parameters()) +
                                             list(fcn_model.module.aux_classifier.parameters())}
                                  ], lr=lr, momentum=0.9)'''

    optimizer = torch.optim.Adam([{'params': fcn_model.module.classifier.parameters()},
                                  {'params': list(fcn_model.module.backbone.parameters()) +
                                             list(fcn_model.module.aux_classifier.parameters())}
                                  ], lr=lr, weight_decay=0.0001)
    lambda1 = lambda epoch: math.pow(1 - (epoch / n_epoch), poly_exp)
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)

    with open(cityscape_loss_weight_path, "rb") as file:  # (needed for python3)
        class_weights = np.array(pickle.load(file))
    class_weights = torch.from_numpy(class_weights)
    class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda()
    criterion = torch.nn.CrossEntropyLoss(weight=class_weights).cuda()

    # report everything
    text = ('Model created' + (', n_train: ' + str(n_train)) + (', n_epoch: ' + str(n_epoch)) +
            (', batch_train_size: ' + str(batch_train_size)) + (', idx_ratio: ' + str(idx_ratio)) +
            (', n_test: ' + str(n_test)) + (', batch_test_size: ' + str(batch_test_size)) +
            (', test_factor: ' + str(test_factor)) + (', optimizer: ' + str(optimizer)) +
            (', model: ' + str(fcn_model)))
    print(text)

    # for documentation
    train_text = [str(x) for x in range(1, n_epoch+1)]
    test_text = [str(x) for x in range(1, n_epoch+1)]
    test_text_index = 0

    # write text to csv
    dir_number = 1
    while os.path.exists(os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number)))):
        dir_number += 1
    run_path = os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number)))
    os.makedirs(run_path)  # make run_* dir
    f = open(os.path.join(run_path, 'info.txt'), 'w+')  # write .txt file
    f.write(text)
    f.close()
    copy(__file__, os.path.join(run_path, os.path.basename(__file__)))

    # write training progress
    csv_path_train = os.path.join(run_path, csv_name_train)
    title = ["Training progress for n_model = " + str(1) + ", idx_ratio:  " + str(idx_ratio) +
             ', for multiple epoch, torch seed: ' + str(manual_seed)]
    with open(csv_path_train, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # write test progress
    csv_path_test = os.path.join(run_path, csv_name_test)
    title = ["Test progress for n_model = " + str(1) + ", idx_ratio:  " + str(idx_ratio)
             + ', for multiple epoch, torch seed: ' + str(manual_seed) + 'run_path: ' + run_path +
             'index_from: ' + index_path_name]
    with open(csv_path_test, mode='a+', newline='') as test_file:
        test_writer = csv.writer(test_file, delimiter=',')
        test_writer.writerow(title)

    # load from previous run if requested
    if continue_flag:
        fcn_model.load_state_dict(torch.load(
            'C:\\Users\\steve\\Desktop\\projects\\al_kitti\\results\\first_test\\adam_run_005\\model_weight_epoch_10.pt'))
        print('weight loaded')

    # training process, n-th batch
    for i_epoch in range(n_epoch):
        loss_epoch = []
        iou_epoch = []
        time_epoch = []
        for i_batch, (data_train, target_train) in enumerate(train_dataloader):

            t = Timer()
            t.start()
            # train batch
            output, loss, iou, fcn_model, optimizer = train_batch(fcn_model, data_train, target_train,
                                                                  optimizer, device, criterion)
            print('Epoch: ' + str(i_epoch) + '\t Batch: ' + str(i_batch) + '/' + str(len(train_dataloader))
                  + '; model ' + str(0) +
                  '; train loss avg: ' + "{:.3f}".format(loss) +
                  '; train iou avg: ' + "{:.3f}".format(iou.mean()))
            for param_group in optimizer.param_groups:
                print(param_group['lr'])
            loss_epoch.append(loss)
            iou_epoch.append(iou.mean())
            time_epoch.append(t.stop())

        # document train result
        train_text[i_epoch] = train_text[i_epoch] + ";{:.4f}".format(np.array(loss_epoch).mean()) + \
                              ";{:.4f}".format(np.array(iou_epoch).mean()) + \
                              ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) + ';' + str(len(train_index))

        # update train documentation
        text = train_text[i_epoch].split(";")
        with open(csv_path_train, mode='a+', newline='') as test_file:
            test_writer = csv.writer(test_file, delimiter=';')
            test_writer.writerow(text)

        # one epoch ends here
        scheduler.step()
        print(optimizer)
        # save temporary model
        if i_epoch % 10 == 0 or (i_epoch+1) == n_epoch:
            fcn_model.train()
            torch.save(fcn_model.state_dict(), os.path.join(run_path, ('model_weight_epoch_train' +
                                                                       '{:03d}'.format(i_epoch) + '.pt')))
            fcn_model.eval()
            torch.save(fcn_model.state_dict(), os.path.join(run_path, ('model_weight_epoch_' +
                                                                       '{:03d}'.format(i_epoch) + '.pt')))

        # perform test
        create_pred_img(fcn_model, test_dataloader, inference_path, color_path)
        all_result_dict = cityscapes_eval()

        # average training time
        mean_time = np.array(time_epoch).mean()

        # document test result
        test_text[test_text_index] = test_text[test_text_index] + \
                                     ";{:.4f}".format(all_result_dict['averageScoreClasses']) + \
                                     ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) \
                                     + ";{:.4f}".format(mean_time) + ';' + str(len(train_index))

        # update test documentation
        text = test_text[test_text_index].split(";")
        with open(csv_path_test, mode='a+', newline='') as test_file:
            test_writer = csv.writer(test_file, delimiter=';')
            test_writer.writerow(text)

        test_text_index = test_text_index + 1
示例#19
0
        plt.imshow(image)
        plt.show()


if __name__ == "__main__":
    writer = SummaryWriter()
    name = "aölsdjfalök"
    saving_path = "models/" + name

    num_epochs = 100
    learn_rate = 3e-5

    dataset = MotBBSequence('dataset_utils/Mot17_test_single.txt',
                            use_only_first_video=False)
    train_data = Subset(dataset, range(0, dataset.valid_begin))
    valid_data = Subset(dataset, range(dataset.valid_begin, len(dataset)))

    obs_length = 10
    pred_length = 9

    loss_params = {
        "grid_shape": (16, 16),
        "image_shape": (416, 416),
        "path_anchors": "dataset_utils/anchors/anchors5.txt"
    }
    loss_function = NaiveLoss(loss_params)
    model = SequenceClassifier([16, 16, loss_function.num_anchors, 4],
                               [16, 16, loss_function.num_anchors, 4], 16)
    optimizer = optim.Adam(
        model.parameters(),
示例#20
0
def run(args):
    argstr = yaml.dump(args.__dict__, default_flow_style=False)
    print('arguments:')
    print(argstr)
    argfile = osp.join(osp.join(args.expdir), 'finetune_p_args.yaml')

    if osp.isfile(argfile):
        oldargs = yaml.load(open(argfile))
        if oldargs != args.__dict__:
            print('WARNING: Changed configuration keys compared to stored experiment')
            utils.arguments.compare_dicts(oldargs, args.__dict__, verbose=True)

    args.cuda = not args.no_cuda
    args.validate_first = not args.no_validate_first
    args.validate = not args.no_validate

    if not args.dry:
        utils.ifmakedirs(args.expdir)
        logging.print_file(argstr, argfile)

    transforms = get_transforms(IN1K, args.input_size, crop=(args.input_crop == 'square'), need=('val',), backbone=args.backbone)
    datas = {}
    for split in ('train', 'val'):
        datas[split] = IdDataset(IN1K(args.imagenet_path, split, transform=transforms['val']))
    loaders = {}
    collate_fn = dict(collate_fn=list_collate) if args.input_crop == 'rect' else {}
    selected = []
    count = Counter()
    for i, label in enumerate(datas['train'].dataset.labels):
        if count[label] < args.images_per_class:
            selected.append(i)
            count[label] += 1
    datas['train'].dataset = Subset(datas['train'].dataset, selected)
    loaders['train'] = DataLoader(datas['train'], batch_size=args.batch_size, shuffle=True,
                                num_workers=args.workers, pin_memory=True, **collate_fn)
    loaders['val'] = DataLoader(datas['val'], batch_size=args.batch_size, shuffle=args.shuffle_val,
                                num_workers=args.workers, pin_memory=True, **collate_fn)

    model = get_multigrain(args.backbone, include_sampling=False,
                           pretrained_backbone=args.pretrained_backbone, learn_p=True)

    criterion = torch.nn.CrossEntropyLoss()
    if args.cuda:
        criterion = utils.cuda(criterion)
        model = utils.cuda(model)

    optimizers = OD()
    p = model.pool.p
    optimizers['p'] = SGD([p], lr=args.learning_rate, momentum=args.momentum)
    optimizers = MultiOptim(optimizers)

    def training_step(batch):
        optimizers.zero_grad()

        output_dict = model(batch['input'])
        loss = criterion(output_dict['classifier_output'], batch['classifier_target'])
        top1, top5 = utils.accuracy(output_dict['classifier_output'].data, batch['classifier_target'].data, topk=(1, 5))

        p.grad = torch.autograd.grad(loss, p)[0]  # partial backward
        optimizers.step()

        return OD([
            ('cross_entropy', loss.item()),
            ('p', p.item()),
            ('top1', top1),
            ('top5', top5),
        ])

    def validation_step(batch):
        with torch.no_grad():
            output_dict = model(batch['input'])
            target = batch['classifier_target']
            xloss = criterion(output_dict['classifier_output'], target)
            top1, top5 = utils.accuracy(output_dict['classifier_output'], target, topk=(1, 5))

        return OD([
            ('cross_entropy', xloss.item()),
            ('top1', top1),
            ('top5', top5),
        ])

    metrics_history = OD()

    checkpoints = utils.CheckpointHandler(args.expdir)

    if checkpoints.exists(args.resume_epoch, args.resume_from):
        epoch = checkpoints.resume(model, metrics_history=metrics_history,
                           resume_epoch=args.resume_epoch, resume_from=args.resume_from)
    else:
        raise ValueError('Checkpoint ' + args.resume_from + ' not found')

    if args.init_pooling_exponent is not None:  # overwrite stored pooling exponent
        p.data.fill_(args.init_pooling_exponent)

    print("Multigrain model with {} backbone and p={} pooling:".format(args.backbone, p.item()))
    print(model)

    def loop(loader, step, epoch, prefix=''):  # Training or validation loop
        metrics = defaultdict(utils.HistoryMeter if prefix == 'train_' else utils.AverageMeter)
        tic()
        for i, batch in enumerate(loader):
            if prefix == 'train_':
                lr = args.learning_rate * (1 - i / len(loader)) ** args.learning_rate_decay_power
                optimizers['p'].param_groups[0]['lr'] = lr
            if args.cuda:
                batch = utils.cuda(batch)
            data_time = 1000 * toc(); tic()
            step_metrics = step(batch)
            step_metrics['data_time'] = data_time
            step_metrics['batch_time'] = 1000 * toc(); tic()
            for (k, v) in step_metrics.items():
                metrics[prefix + k].update(v, len(batch['input']))
            print(logging.str_metrics(metrics, iter=i, num_iters=len(loader), epoch=epoch, num_epochs=epoch))
        print(logging.str_metrics(metrics, epoch=epoch, num_epochs=epoch))
        toc()
        if prefix == 'val_':
            return OD((k, v.avg) for (k, v) in metrics.items())
        return OD((k, v.hist) for (k, v) in metrics.items())

    if args.validate_first and 0 not in metrics_history:
        model.eval()
        metrics_history[epoch] = loop(loaders['val'], validation_step, epoch, 'val_')
        checkpoints.save_metrics(metrics_history)

    model.eval()  # freeze batch normalization
    metrics = loop(loaders['train'], training_step, epoch, 'train_')
    metrics['last_p'] = p.item()

    if args.validate:
        model.eval()
        metrics.update(loop(loaders['val'], validation_step, epoch + 1, 'val_'))

        metrics_history[epoch + 1] = metrics

    if not args.dry:
        utils.make_plots(metrics_history, args.expdir)
        checkpoints.save(model, epoch + 1, optimizers, metrics_history)
示例#21
0
def main():
    parser = argparse.ArgumentParser(description='Train a model')
    # Required arguments
    parser.add_argument('dataset',
                        metavar='DS_NAME',
                        type=str,
                        help='Dataset name')
    parser.add_argument('model_arch',
                        metavar='MODEL_ARCH',
                        type=str,
                        help='Model name')
    # Optional arguments
    parser.add_argument('-o',
                        '--out_path',
                        metavar='PATH',
                        type=str,
                        help='Output path for model',
                        default=cfg.MODEL_DIR)
    parser.add_argument('-d',
                        '--device_id',
                        metavar='D',
                        type=int,
                        help='Device id. -1 for CPU.',
                        default=0)
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('-e',
                        '--epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        metavar='LR',
                        help='learning rate (default: 0.1)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=100,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--resume',
                        default=None,
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--lr-step',
                        type=int,
                        default=30,
                        metavar='N',
                        help='Step sizes for LR')
    parser.add_argument('--lr-gamma',
                        type=float,
                        default=0.1,
                        metavar='N',
                        help='LR Decay Rate')
    parser.add_argument('-w',
                        '--num_workers',
                        metavar='N',
                        type=int,
                        help='# Worker threads to load data',
                        default=10)
    parser.add_argument('--train_subset',
                        type=int,
                        help='Use a subset of train set',
                        default=None)
    parser.add_argument('--pretrained',
                        type=str,
                        help='Use pretrained network',
                        default=None)
    parser.add_argument('--weighted-loss',
                        action='store_true',
                        help='Use a weighted loss',
                        default=None)
    args = parser.parse_args()
    params = vars(args)

    # torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    # ----------- Set up dataset
    dataset_name = params['dataset']
    valid_datasets = datasets.__dict__.keys()
    if dataset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    dataset = datasets.__dict__[dataset_name]

    modelfamily = datasets.dataset_to_modelfamily[dataset_name]
    train_transform = datasets.modelfamily_to_transforms[modelfamily]['train']
    test_transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    trainset = dataset(train=True, transform=train_transform)
    testset = dataset(train=False, transform=test_transform)
    num_classes = len(trainset.classes)
    params['num_classes'] = num_classes

    if params['train_subset'] is not None:
        idxs = np.arange(len(trainset))
        ntrainsubset = params['train_subset']
        idxs = np.random.choice(idxs, size=ntrainsubset, replace=False)
        trainset = Subset(trainset, idxs)

    # ----------- Set up model
    model_name = params['model_arch']
    pretrained = params['pretrained']
    # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained)
    model = zoo.get_net(model_name,
                        modelfamily,
                        pretrained,
                        num_classes=num_classes)
    model = model.to(device)

    # ----------- Train
    out_path = params['out_path']
    model_utils.train_model(model,
                            trainset,
                            testset=testset,
                            device=device,
                            **params)

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(out_path, 'params.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)
示例#22
0
    def __init__(self,
                 root: str,
                 normal_class=0,
                 tokenizer='spacy',
                 use_tfidf_weights=False,
                 append_sos=False,
                 append_eos=False,
                 clean_txt=False,
                 max_seq_len_prior=None):
        super().__init__(root)

        self.n_classes = 2  # 0: normal, 1: outlier
        classes = list(range(6))

        groups = [[
            'comp.graphics', 'comp.os.ms-windows.misc',
            'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware',
            'comp.windows.x'
        ],
                  [
                      'rec.autos', 'rec.motorcycles', 'rec.sport.baseball',
                      'rec.sport.hockey'
                  ], ['sci.crypt', 'sci.electronics', 'sci.med', 'sci.space'],
                  ['misc.forsale'],
                  [
                      'talk.politics.misc', 'talk.politics.guns',
                      'talk.politics.mideast'
                  ],
                  [
                      'talk.religion.misc', 'alt.atheism',
                      'soc.religion.christian'
                  ]]
        short_group_names = ['comp', 'rec', 'sci', 'misc', 'pol', 'rel']
        self.subset = short_group_names[normal_class]

        self.normal_classes = groups[normal_class]
        self.outlier_classes = []
        del classes[normal_class]
        for i in classes:
            self.outlier_classes += groups[i]

        # Load the 20 Newsgroups dataset
        self.train_set, self.test_set = newsgroups20_dataset(
            directory=root,
            train=True,
            test=True,
            clean_txt=clean_txt,
            groups=groups,
            short_group_names=short_group_names)

        # Pre-process
        self.train_set.columns.add('index')
        self.test_set.columns.add('index')
        self.train_set.columns.add('weight')
        self.test_set.columns.add('weight')

        train_idx_normal = []  # for subsetting train_set to normal class
        for i, row in enumerate(self.train_set):
            if row['label'] in self.normal_classes:
                train_idx_normal.append(i)
                row['label'] = torch.tensor(0)
            else:
                row['label'] = torch.tensor(1)
            row['text'] = row['text'].lower()

        test_n_idx = []  # subsetting test_set to selected normal classes
        test_a_idx = []  # subsetting test_set to selected anomalous classes
        for i, row in enumerate(self.test_set):
            if row['label'] in self.normal_classes:
                test_n_idx.append(i)
            else:
                test_a_idx.append(i)
            row['label'] = torch.tensor(
                0) if row['label'] in self.normal_classes else torch.tensor(1)
            row['text'] = row['text'].lower()

        # Subset train_set to normal class
        self.train_set = Subset(self.train_set, train_idx_normal)
        # Subset test_set to selected normal classes
        self.test_n_set = Subset(self.test_set, test_n_idx)
        # Subset test_set to selected anomalous classes
        self.test_a_set = Subset(self.test_set, test_a_idx)

        # Make corpus and set encoder
        text_corpus = [
            row['text']
            for row in datasets_iterator(self.train_set, self.test_set)
        ]
        if tokenizer == 'spacy':
            self.encoder = SpacyEncoder(text_corpus,
                                        min_occurrences=3,
                                        append_eos=append_eos)
        if tokenizer == 'bert':
            self.encoder = MyBertTokenizer.from_pretrained('bert-base-uncased',
                                                           cache_dir=root)

        # Encode
        self.max_seq_len = 0
        for row in datasets_iterator(self.train_set, self.test_set):
            if append_sos:
                sos_id = self.encoder.stoi[DEFAULT_SOS_TOKEN]
                row['text'] = torch.cat((torch.tensor(sos_id).unsqueeze(0),
                                         self.encoder.encode(row['text'])))
            else:
                row['text'] = self.encoder.encode(row['text'])
            if len(row['text']) > self.max_seq_len:
                self.max_seq_len = len(row['text'])

        # Compute tf-idf weights
        if use_tfidf_weights:
            compute_tfidf_weights(self.train_set,
                                  self.test_set,
                                  vocab_size=self.encoder.vocab_size)
        else:
            for row in datasets_iterator(self.train_set, self.test_set):
                row['weight'] = torch.empty(0)

        # Get indices after pre-processing
        for i, row in enumerate(self.train_set):
            row['index'] = i
        for i, row in enumerate(self.test_set):
            row['index'] = i

        # length prior
        sent_lengths = [len(row['text']) for row in self.train_set]
        sent_lengths_freq = np.bincount(np.array(sent_lengths))
        sent_lengths_freq = np.concatenate(
            (sent_lengths_freq,
             np.array((max_seq_len_prior - max(sent_lengths)) * [0])),
            axis=0)
        sent_lengths_freq = sent_lengths_freq + 1
        self.length_prior = np.log(sent_lengths_freq) - np.log(
            sent_lengths_freq.sum())
示例#23
0
    parser.add_argument('--plot_freq', type=int, default=250)
    parser.add_argument('--save_freq', type=int, default=10)
    # eval setting
    parser.add_argument('--val_fraction', type=float, default=0.1)
    parser.add_argument('--eval_batch_size', type=int, default=32)
    parser.add_argument('--eval_plot_freq', type=int, default=10)
    args = parser.parse_args()

    model = DeepGMR(args)
    if torch.cuda.is_available():
        model.cuda()

    data = TrainData(args.data_file, args)
    ids = np.random.permutation(len(data))
    n_val = int(args.val_fraction * len(data))
    train_data = Subset(data, ids[n_val:])
    valid_data = Subset(data, ids[:n_val])

    train_loader = DataLoader(train_data,
                              args.batch_size,
                              drop_last=True,
                              shuffle=True)
    valid_loader = DataLoader(valid_data, args.eval_batch_size, drop_last=True)

    optimizer = torch.optim.Adam(model.parameters(), args.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           factor=0.5,
                                                           min_lr=1e-6)
    writer = SummaryWriter(args.log_dir)

    for epoch in range(args.n_epochs):
示例#24
0
def create_dataloaders(dataset_name, data_transforms, input_size, batch_size):
    dataloaders_dict = {}

    print("Initializing Datasets and Dataloaders...")
    if dataset_name == "imagenetv2":
        # Create training and validation datasets
        train_dataset = ImageNetV2Dataset(transform=data_transforms['train'])
        test_dataset = ImageNetV2Dataset(transform=data_transforms['val'])

        train_test_splits_file = 'split_indices.pkl'

        if os.path.exists(train_test_splits_file):
            indices_split = pickle.load(open(train_test_splits_file, 'rb'))
        else:
            index_to_class = {
                idx: cl
                for idx, (_, cl) in enumerate(train_dataset)
            }
            class_to_index = {idx: [] for idx in range(1000)}
            for idx, cl in index_to_class.items():
                class_to_index[cl].append(idx)

            indices_split = {'train': [], 'val': [], 'test': []}
            for cl in class_to_index:
                shuffle(class_to_index[cl])
                indices_split['train'].extend(
                    class_to_index[cl][:int(0.7 * len(class_to_index[cl]))])
                indices_split['val'].extend(
                    class_to_index[cl][int(0.7 * len(class_to_index[cl])
                                           ):int(0.9 *
                                                 len(class_to_index[cl]))])
                indices_split['test'].extend(
                    class_to_index[cl][int(0.9 * len(class_to_index[cl])):])

            pickle.dump(indices_split, open(train_test_splits_file, 'wb'))

        # Create training and validation dataloaders
        dataloaders_dict = {
            x: torch.utils.data.DataLoader(Subset(test_dataset,
                                                  indices_split[x]),
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=4)
            for x in ['val', 'test']
        }
        dataloaders_dict['train'] = torch.utils.data.DataLoader(
            Subset(train_dataset, indices_split['train']),
            batch_size=batch_size,
            shuffle=True,
            num_workers=4)

    elif dataset_name == "imagenetv2cifar100":
        # Create training and validation datasets
        print("Custom imagenetv2cifar dataset")
        train_dataset = ImageNetV2Dataset(
            transform=data_transforms['train']['imagenetv2'])
        test_dataset = ImageNetV2Dataset(
            transform=data_transforms['val']['imagenetv2'])

        train_test_splits_file = 'split_indices.pkl'

        if os.path.exists(train_test_splits_file):
            indices_split = pickle.load(open(train_test_splits_file, 'rb'))
        else:
            index_to_class = {
                idx: cl
                for idx, (_, cl) in enumerate(train_dataset)
            }
            class_to_index = {idx: [] for idx in range(1000)}
            for idx, cl in index_to_class.items():
                class_to_index[cl].append(idx)

            indices_split = {'train': [], 'val': [], 'test': []}
            for cl in class_to_index:
                shuffle(class_to_index[cl])
                indices_split['train'].extend(
                    class_to_index[cl][:int(0.7 * len(class_to_index[cl]))])
                indices_split['val'].extend(
                    class_to_index[cl][int(0.7 * len(class_to_index[cl])
                                           ):int(0.9 *
                                                 len(class_to_index[cl]))])
                indices_split['test'].extend(
                    class_to_index[cl][int(0.9 * len(class_to_index[cl])):])

            pickle.dump(indices_split, open(train_test_splits_file, 'wb'))

        train_dataset1 = Subset(train_dataset, indices_split['train'])
        test_dataset1 = Subset(test_dataset, indices_split['val'])

        train_dataset2 = torchvision.datasets.CIFAR100(
            root='./data',
            train=True,
            download=True,
            transform=data_transforms['train']['cifar100'])
        test_dataset2 = torchvision.datasets.CIFAR100(
            root='./data',
            train=False,
            download=True,
            transform=data_transforms['val']['cifar100'])

        train_dataset2.targets = [x + 1000 for x in train_dataset2.targets]
        test_dataset2.targets = [x + 1000 for x in test_dataset2.targets]

        final_train_dataset = torch.utils.data.ConcatDataset(
            [train_dataset1, train_dataset2])
        final_test_dataset = torch.utils.data.ConcatDataset(
            [test_dataset1, test_dataset2])

        # Create training and validation dataloaders
        dataloaders_dict['train'] = torch.utils.data.DataLoader(
            final_train_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=4)
        dataloaders_dict['val'] = torch.utils.data.DataLoader(
            final_test_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=4)

    else:
        if dataset_name == "imagenet":
            train_dataset = torchvision.datasets.ImageNet(
                root='./data',
                split='train',
                download=True,
                transform=data_transforms['train'])
            test_dataset = torchvision.datasets.ImageNet(
                root='./data',
                split='val',
                download=True,
                transform=data_transforms['val'])
        elif dataset_name == "cifar10":
            train_dataset = torchvision.datasets.CIFAR10(
                root='./data',
                train=True,
                download=True,
                transform=data_transforms['train'])
            test_dataset = torchvision.datasets.CIFAR10(
                root='./data',
                train=False,
                download=True,
                transform=data_transforms['val'])
        elif dataset_name == "cifar100":
            train_dataset = torchvision.datasets.CIFAR100(
                root='./data',
                train=True,
                download=True,
                transform=data_transforms['train'])
            test_dataset = torchvision.datasets.CIFAR100(
                root='./data',
                train=False,
                download=True,
                transform=data_transforms['val'])
        else:
            print("Invalid dataset name, exiting...")
            sys.exit(0)

        dataloaders_dict['train'] = torch.utils.data.DataLoader(
            train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
        dataloaders_dict['val'] = torch.utils.data.DataLoader(
            test_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

    return dataloaders_dict
def train(rawdata, charcounts, maxlens, unique_onehotvals):
    mb_size = 256
    lr = 2.0e-4
    cnt = 0
    latent_dim = 32
    recurrent_hidden_size = 24

    epoch_len = 8
    max_veclen = 0.0
    patience = 12 * epoch_len
    patience_duration = 0

    # mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)

    input_dict = {}
    input_dict['discrete'] = discrete_cols
    input_dict['continuous'] = continuous_cols

    input_dict['onehot'] = {}
    for k in onehot_cols:
        dim = int(np.ceil(np.log(len(unique_onehotvals[k])) / np.log(2.0)))
        input_dict['onehot'][k] = dim

    if len(charcounts) > 0:
        text_dim = int(np.ceil(np.log(len(charcounts)) / np.log(2.0)))
        input_dict['text'] = {t: text_dim for t in text_cols}
    else:
        text_dim = 0
        input_dict['text'] = {}

    data = Dataseq(rawdata, charcounts, input_dict, unique_onehotvals, maxlens)
    data_idx = np.arange(data.__len__())
    np.random.shuffle(data_idx)
    n_folds = 6
    fold_size = 1.0 * data.__len__() / n_folds
    folds = [
        data_idx[int(i * fold_size):int((i + 1) * fold_size)] for i in range(6)
    ]

    fold_groups = {}
    fold_groups[0] = {'train': [0, 1, 2, 4], 'es': [3], 'val': [5]}
    fold_groups[1] = {'train': [0, 2, 3, 5], 'es': [1], 'val': [4]}
    fold_groups[2] = {'train': [1, 3, 4, 5], 'es': [2], 'val': [0]}
    fold_groups[3] = {'train': [0, 2, 3, 4], 'es': [5], 'val': [1]}
    fold_groups[4] = {'train': [0, 1, 3, 5], 'es': [4], 'val': [2]}
    fold_groups[5] = {'train': [1, 2, 4, 5], 'es': [0], 'val': [3]}

    for fold in range(1):

        train_idx = np.array(
            list(
                itertools.chain.from_iterable(
                    [folds[i] for i in fold_groups[fold]['train']])))
        es_idx = np.array(
            list(
                itertools.chain.from_iterable(
                    [folds[i] for i in fold_groups[fold]['es']])))
        val_idx = np.array(folds[fold_groups[fold]['val'][0]])

        train = Subset(data, train_idx)
        es = Subset(data, es_idx)
        val = Subset(data, val_idx)

        kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
        train_iter = torch.utils.data.DataLoader(train,
                                                 batch_size=mb_size,
                                                 shuffle=True,
                                                 **kwargs)
        es_iter = torch.utils.data.DataLoader(es,
                                              batch_size=mb_size,
                                              shuffle=True,
                                              **kwargs)
        val_iter = torch.utils.data.DataLoader(val,
                                               batch_size=mb_size,
                                               shuffle=True,
                                               **kwargs)

        embeddings = {}
        reverse_embeddings = {}
        onehot_embedding_weights = {}
        for k in onehot_cols:
            dim = input_dict['onehot'][k]
            onehot_embedding_weights[k] = net.get_embedding_weight(
                len(unique_onehotvals[k]), dim)
            #embeddings[k] = nn.Embedding(len(unique_onehotvals[k]), dim, max_norm=1.0)
            embeddings[k] = nn.Embedding(len(unique_onehotvals[k]),
                                         dim,
                                         _weight=onehot_embedding_weights[k])
            reverse_embeddings[k] = net.EmbeddingToIndex(
                len(unique_onehotvals[k]),
                dim,
                _weight=onehot_embedding_weights[k])

        if text_dim > 0:
            text_embedding_weights = net.get_embedding_weight(
                len(charcounts) + 1, text_dim)
            #text_embedding = nn.Embedding(len(charcounts)+1, text_dim, max_norm=1.0)
            text_embedding = nn.Embedding(len(charcounts) + 1,
                                          text_dim,
                                          _weight=text_embedding_weights)
            text_embeddingtoindex = net.EmbeddingToIndex(
                len(charcounts) + 1, text_dim, _weight=text_embedding_weights)
            for k in text_cols:
                embeddings[k] = text_embedding
                reverse_embeddings[k] = text_embeddingtoindex

        enc = net.Encoder(input_dict,
                          dim=latent_dim,
                          recurrent_hidden_size=recurrent_hidden_size)
        dec = net.Decoder(input_dict,
                          maxlens,
                          dim=latent_dim,
                          recurrent_hidden_size=recurrent_hidden_size)

        if use_cuda:
            embeddings = {k: embeddings[k].cuda() for k in embeddings.keys()}
            enc.cuda()
            dec.cuda()

        #print(enc.parameters)
        #print(dec.parameters)

        #contrastivec = contrastive.ContrastiveLoss(margin=margin)
        logloss = contrastive.GaussianOverlap()

        #solver = optim.RMSprop([p for em in embeddings.values() for p in em.parameters()] +  [p for p in enc.parameters()] + [p for p in dec.parameters()], lr=lr)
        solver = optim.Adam(
            [p for em in embeddings.values() for p in em.parameters()] +
            [p for p in enc.parameters()] + [p for p in dec.parameters()],
            lr=lr)

        Tsample = next(es_iter.__iter__())
        if use_cuda:
            Tsample = {
                col: Variable(tt[0:128]).cuda()
                for col, tt in Tsample.items()
            }
        else:
            Tsample = {col: Variable(tt[0:128]) for col, tt in Tsample.items()}

        print({col: tt[0] for col, tt in Tsample.items()})

        print('starting training')
        loss = 0.0
        for it in range(1000000):
            # X = Variable(torch.tensor(np.array([[1,2,4], [4,1,9]]))).cuda()
            batch_idx, T = next(enumerate(train_iter))
            if use_cuda:
                T = {col: Variable(tt).cuda() for col, tt in T.items()}
            else:
                T = {col: Variable(tt) for col, tt in T.items()}

            X = {}
            for col, tt in T.items():
                if col in embeddings.keys():
                    X[col] = embeddings[col](tt)
                else:
                    X[col] = tt.float()

            mu = enc(X)
            X2 = dec(mu)

            T2 = {}
            X2d = {col: (1.0 * tt).detach() for col, tt in X2.items()}

            for col, embedding in embeddings.items():
                T2[col] = reverse_embeddings[col](X2[col])
                X2[col] = embeddings[col](T2[col])
                X2d[col] = embeddings[col](T2[col].detach())
            '''
            X2d = {col: (1.0*tt).detach() for col, tt in X2.items()}
            T2 = discretize(X2d, embeddings, maxlens)
            for col, embedding in embeddings.items():
                X2d[col] = embeddings[col](T2[col].detach())
            '''
            '''
            T2 = discretize(X2, embeddings, maxlens)
            X2d = {col: (1.0*tt).detach() for col, tt in X2.items()}

            for col, embedding in embeddings.items():
                X2[col] = embeddings[col](T2[col]) #+0.05 X2[col]
                X2d[col] = embeddings[col](T2[col].detach())
            '''

            mu2 = enc(X2)
            mu2 = mu2.view(mb_size, -1)

            mu2d = enc(X2d)

            mu2d = mu2d.view(mb_size, -1)

            mu = mu.view(mb_size, -1)

            are_same = are_equal({col: x[::2]
                                  for col, x in T.items()},
                                 {col: x[1::2]
                                  for col, x in T.items()})
            #print('f same ', torch.mean(torch.mean(are_same, 1)))
            #enc_loss = contrastivec(mu2[::2], mu2[1::2], torch.zeros(int(mb_size / 2)).cuda())
            enc_loss = logloss(torch.mean(torch.pow(mu[::2] - mu[1::2], 2), 1),
                               are_same)
            #enc_loss += 0.5*contrastivec(mu2[::2], mu2[1::2], are_same)
            #enc_loss += 0.5 * contrastivec(mu[::2], mu2[1::2], are_same)
            enc_loss += 1.0 * logloss(torch.mean(torch.pow(mu - mu2, 2), 1),
                                      torch.ones(mb_size).cuda())
            enc_loss += 2.0 * logloss(torch.mean(torch.pow(mu - mu2d, 2), 1),
                                      torch.zeros(mb_size).cuda())
            #enc_loss += 1.0 * contrastivec(mu2d[0::2], mu2d[1::2], torch.ones(int(mb_size/2)).cuda())
            #enc_loss += 1.0 * contrastivec(mu2d[::2], mu2d[1::2], torch.ones(int(mb_size / 2)).cuda())
            #enc_loss += 0.5 * contrastivec(mu2d[::2], mu2d[1::2], torch.ones(int(mb_size/2)).cuda())
            '''
            adotb = torch.matmul(mu, mu.permute(1, 0))  # batch_size x batch_size
            adota = torch.matmul(mu.view(-1, 1, latent_dim), mu.view(-1, latent_dim, 1))  # batch_size x 1 x 1
            diffsquares = (adota.view(-1, 1).repeat(1, mb_size) + adota.view(1, -1).repeat(mb_size, 1) - 2 * adotb) / latent_dim

            # did I f**k up something here? diffsquares can apparently be less than 0....
            mdist = torch.sqrt(torch.clamp(torch.triu(diffsquares, diagonal=1),  min=0.0))
            mdist = torch.clamp(margin - mdist, min=0.0)
            number_of_pairs = mb_size * (mb_size - 1) / 2

            enc_loss = 0.5 * torch.sum(torch.triu(torch.pow(mdist, 2), diagonal=1)) / number_of_pairs

            target = torch.ones(mu.size(0), 1)
            if use_cuda:
                target.cuda()
            enc_loss += contrastivec(mu, mu2, target.cuda())

            target = torch.zeros(mu.size(0), 1)
            if use_cuda:
                target.cuda()
            enc_loss += 2.0 * contrastivec(mu, mu2d, target.cuda())
            '''

            enc_loss.backward()
            solver.step()

            enc.zero_grad()
            dec.zero_grad()
            for col in embeddings.keys():
                embeddings[col].zero_grad()

            loss += enc_loss.data.cpu().numpy()
            veclen = torch.mean(torch.pow(mu, 2))
            if it % epoch_len == 0:
                print(it, loss / epoch_len,
                      veclen.data.cpu().numpy())  #enc_loss.data.cpu().numpy(),

                Xsample = {}
                for col, tt in Tsample.items():
                    if col in embeddings.keys():
                        Xsample[col] = embeddings[col](tt)
                    else:
                        Xsample[col] = tt.float()

                mu = enc(Xsample)
                X2sample = dec(mu)
                X2sampled = {col: tt.detach() for col, tt in X2sample.items()}
                T2sample = discretize(X2sample, embeddings, maxlens)

                mu2 = enc(X2sample)
                mu2d = enc(X2sampled)

                if 'Fare' in continuous_cols and 'Age' in continuous_cols:
                    print([
                        np.mean(
                            np.abs(Xsample[col].data.cpu().numpy() -
                                   X2sample[col].data.cpu().numpy()))
                        for col in ['Fare', 'Age']
                    ])

                print({
                    col: tt[0:2].data.cpu().numpy()
                    for col, tt in T2sample.items()
                })

                if 'Survived' in onehot_cols:
                    print(
                        '% survived correct: ',
                        np.mean(T2sample['Survived'].data.cpu().numpy() ==
                                Tsample['Survived'].data.cpu().numpy()),
                        np.mean(
                            Tsample['Survived'].data.cpu().numpy() == np.
                            ones_like(Tsample['Survived'].data.cpu().numpy())))

                if 'Cabin' in text_cols:
                    print(embeddings['Cabin'].weight[data.charindex['1']])

                are_same = are_equal(
                    {col: x[::2]
                     for col, x in Tsample.items()},
                    {col: x[1::2]
                     for col, x in Tsample.items()})
                # print('f same ', torch.mean(torch.mean(are_same, 1)))
                # enc_loss = contrastivec(mu2[::2], mu2[1::2], torch.zeros(int(mb_size / 2)).cuda())
                #es_loss = contrastivec(mu[::2], mu[1::2], are_same)
                # enc_loss += 0.25*contrastivec(mu2[::2], mu2[1::2], are_same)
                # enc_loss += 0.5 * contrastivec(mu[::2], mu2[1::2], are_same)
                es_loss = 1.0 * contrastivec(mu, mu2,
                                             torch.ones(mu.size(0)).cuda())
                #es_loss += 2.0 * contrastivec(mu, mu2d, torch.zeros(mu.size(0)).cuda())

                #print('mean mu ', torch.mean(torch.pow(mu, 2)))
                print('es loss ', es_loss)

                loss = 0.0
示例#26
0
    def setup_data(self):
        cfg = self.cfg
        batch_sz = cfg.solver.batch_sz
        num_workers = cfg.data.num_workers

        # download and unzip data
        if cfg.data.uri.startswith('s3://') or cfg.data.uri.startswith('/'):
            data_uri = cfg.data.uri
        else:
            data_uri = join(cfg.base_uri, cfg.data.uri)

        data_dirs = []
        zip_uris = [data_uri] if data_uri.endswith('.zip') else list_paths(
            data_uri, 'zip')
        for zip_ind, zip_uri in enumerate(zip_uris):
            zip_path = get_local_path(zip_uri, self.data_cache_dir)
            if not isfile(zip_path):
                zip_path = download_if_needed(zip_uri, self.data_cache_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                data_dir = join(self.tmp_dir, 'data', str(zip_ind))
                data_dirs.append(data_dir)
                zipf.extractall(data_dir)

        # build datasets -- one per zip file and then merge them into a single dataset
        train_ds = []
        valid_ds = []
        test_ds = []
        for data_dir in data_dirs:
            train_dir = join(data_dir, 'train')
            valid_dir = join(data_dir, 'valid')

            transform = Compose(
                [Resize((cfg.data.img_sz, cfg.data.img_sz)),
                 ToTensor()])
            aug_transform = Compose([
                RandomHorizontalFlip(),
                RandomVerticalFlip(),
                ColorJitter(0.1, 0.1, 0.1, 0.1),
                Resize((cfg.data.img_sz, cfg.data.img_sz)),
                ToTensor()
            ])

            if isdir(train_dir):
                if cfg.overfit_mode:
                    train_ds.append(
                        ImageRegressionDataset(train_dir,
                                               cfg.data.class_names,
                                               transform=transform))
                else:
                    train_ds.append(
                        ImageRegressionDataset(train_dir,
                                               cfg.data.class_names,
                                               transform=aug_transform))

            if isdir(valid_dir):
                valid_ds.append(
                    ImageRegressionDataset(valid_dir,
                                           cfg.data.class_names,
                                           transform=transform))
                test_ds.append(
                    ImageRegressionDataset(valid_dir,
                                           cfg.data.class_names,
                                           transform=transform))

        train_ds, valid_ds, test_ds = \
            ConcatDataset(train_ds), ConcatDataset(valid_ds), ConcatDataset(test_ds)

        if cfg.overfit_mode:
            train_ds = Subset(train_ds, range(batch_sz))
            valid_ds = train_ds
            test_ds = train_ds
        elif cfg.test_mode:
            train_ds = Subset(train_ds, range(batch_sz))
            valid_ds = Subset(valid_ds, range(batch_sz))
            test_ds = Subset(test_ds, range(batch_sz))

        train_dl = DataLoader(train_ds,
                              shuffle=True,
                              batch_size=batch_sz,
                              num_workers=num_workers,
                              pin_memory=True)
        valid_dl = DataLoader(valid_ds,
                              shuffle=True,
                              batch_size=batch_sz,
                              num_workers=num_workers,
                              pin_memory=True)
        test_dl = DataLoader(test_ds,
                             shuffle=True,
                             batch_size=batch_sz,
                             num_workers=num_workers,
                             pin_memory=True)

        self.train_ds, self.valid_ds, self.test_ds = (train_ds, valid_ds,
                                                      test_ds)
        self.train_dl, self.valid_dl, self.test_dl = (train_dl, valid_dl,
                                                      test_dl)
示例#27
0
def create_dataloaders(lg, seed_base, agent, data_cfg):
    data_cfg.type = data_cfg.type.strip().lower()
    clz, (MEAN, STD) = get_dataset_settings(data_cfg.type)
    clz: Dataset.__class__
    if 'dataset_root' not in data_cfg:
        data_cfg['dataset_root'] = os.path.abspath(
            os.path.join(os.path.expanduser('~'), 'datasets', data_cfg.type))

    # build transformers
    last_t = time.time()
    baseline_train_trans = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        Cutout(n_holes=1, length=16),
        transforms.Normalize(MEAN, STD),
    ])

    to_tensor = transforms.ToTensor()
    cutout = Cutout(n_holes=1, length=16)
    normalize = transforms.Normalize(MEAN, STD)
    autoaug_train_trans = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        agent,
        lambda tup: (to_tensor(tup[0]), tup[1]),
        lambda tup: (cutout(tup[0]), tup[1]),
        lambda tup: (normalize(tup[0]), tup[1]),
    ])
    val_trans = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(MEAN, STD)])
    test_trans = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(MEAN, STD)])

    lg.info(
        f'=> after building transforms, time cost: {time.time() - last_t:.2f}s'
    )

    # split data sets
    last_t = time.time()
    original_train_val_set = clz(root=data_cfg.dataset_root,
                                 train=True,
                                 download=False,
                                 transform=None)
    lg.info(
        f'=> after building original_train_val_set, time cost: {time.time() - last_t:.2f}s'
    )

    last_t = time.time()
    targets_attr_name = 'targets' if hasattr(original_train_val_set,
                                             'targets') else 'train_labels'
    reduced_size = data_cfg.train_set_size + data_cfg.val_set_size
    original_size = get_train_val_set_size(data_cfg.type)
    assert reduced_size <= original_size, f'too many images({reduced_size}) for the train_val_set of {data_cfg.type})'
    reduced = reduced_size < original_size

    def split(dataset, second_size) -> Tuple[np.ndarray, np.ndarray]:
        """
        split a given dataset into two subsets (preserving the percentage of samples for each class)
        :param dataset: the origin dataset
        :param second_size: the length of second_idx
        :return: two indices (np.ndarray) of the two subsets
                 len(second_idx) = second_size
                 len(first_idx) + len(second_idx) = len(dataset)
        """
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=second_size,
                                     random_state=seed_base)
        first_idx, second_idx = next(
            sss.split(X=list(range(len(dataset))),
                      y=getattr(dataset, targets_attr_name)))
        return first_idx, second_idx

    train_val_set = original_train_val_set
    if reduced:
        lg.info(f'use a reduced set ({reduced_size} of {original_size})')
        _, reduced_train_val_idx = split(original_train_val_set, reduced_size)
        reduced_train_val_set = Subset(original_train_val_set,
                                       reduced_train_val_idx)
        setattr(reduced_train_val_set, targets_attr_name, [
            getattr(original_train_val_set, targets_attr_name)[i]
            for i in reduced_train_val_idx
        ])
        train_val_set = reduced_train_val_set

    train_idx, val_idx = split(train_val_set, data_cfg.val_set_size)
    lg.info(f'=> after splitting, time cost: {time.time() - last_t:.2f}s')

    # build datasets
    # data_cfg.dist_training
    last_t = time.time()
    auged_full_train_set = clz(root=data_cfg.dataset_root,
                               train=True,
                               download=False,
                               transform=autoaug_train_trans)
    full_train_set = clz(root=data_cfg.dataset_root,
                         train=True,
                         download=False,
                         transform=baseline_train_trans)
    auged_sub_train_set = Subset(
        dataset=clz(root=data_cfg.dataset_root,
                    train=True,
                    download=False,
                    transform=autoaug_train_trans),
        indices=np.array([train_val_set.indices[i]
                          for i in train_idx]) if reduced else train_idx)
    val_set = Subset(
        dataset=clz(root=data_cfg.dataset_root,
                    train=True,
                    download=False,
                    transform=val_trans),
        indices=np.array([train_val_set.indices[i]
                          for i in val_idx]) if reduced else val_idx)
    test_set = clz(root=data_cfg.dataset_root,
                   train=False,
                   download=False,
                   transform=test_trans)
    set_sizes = len(full_train_set), len(auged_full_train_set), len(
        auged_sub_train_set), len(val_set), len(test_set)
    lg.info(
        f'=> after building sets, time cost: {time.time() - last_t:.2f}s, test_set[0][0].mean(): {test_set[0][0].mean():.4f} (expected: -0.2404)'
    )  # -0.24041180312633514

    # build loaders
    from torch.utils.data._utils.collate import default_collate
    last_t = time.time()
    loaders = [
        DataLoader(dataset=dataset,
                   num_workers=data_cfg.num_workers,
                   pin_memory=True,
                   collate_fn=cf,
                   batch_size=bs,
                   shuffle=shuffle,
                   drop_last=False)
        for dataset, cf, bs, shuffle in zip((
            full_train_set, auged_full_train_set, auged_sub_train_set, val_set,
            test_set), (default_collate, collate_fn_for_autoaug,
                        collate_fn_for_autoaug, default_collate,
                        default_collate), (
                            data_cfg.batch_size, data_cfg.batch_size,
                            data_cfg.batch_size, data_cfg.batch_size * 2,
                            data_cfg.batch_size * 2), (True, True, True, False,
                                                       False))
    ]

    lg.info(
        f'=> after building loaders, time cost: {time.time() - last_t:.2f}s')

    return set_sizes, loaders
示例#28
0
    def train(self):
        """
        #General Training Loop with Data Selection Strategies
        """
        # Loading the Dataset
        if self.configdata['dataset']['feature'] == 'classimb':
            trainset, validset, testset, num_cls = load_dataset_custom(
                self.configdata['dataset']['datadir'],
                self.configdata['dataset']['name'],
                self.configdata['dataset']['feature'],
                classimb_ratio=self.configdata['dataset']['classimb_ratio'])
        else:
            trainset, validset, testset, num_cls = load_dataset_custom(
                self.configdata['dataset']['datadir'],
                self.configdata['dataset']['name'],
                self.configdata['dataset']['feature'])
        N = len(trainset)
        trn_batch_size = 20
        val_batch_size = 1000
        tst_batch_size = 1000

        # Creating the Data Loaders
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=trn_batch_size,
                                                  shuffle=False,
                                                  pin_memory=True)

        valloader = torch.utils.data.DataLoader(validset,
                                                batch_size=val_batch_size,
                                                shuffle=False,
                                                pin_memory=True)

        testloader = torch.utils.data.DataLoader(testset,
                                                 batch_size=tst_batch_size,
                                                 shuffle=False,
                                                 pin_memory=True)

        # Budget for subset selection
        bud = int(self.configdata['dss_strategy']['fraction'] * N)
        print("Budget, fraction and N:", bud,
              self.configdata['dss_strategy']['fraction'], N)

        # Subset Selection and creating the subset data loader
        start_idxs = np.random.choice(N, size=bud, replace=False)
        idxs = start_idxs
        data_sub = Subset(trainset, idxs)
        subset_trnloader = torch.utils.data.DataLoader(
            data_sub,
            batch_size=self.configdata['dataloader']['batch_size'],
            shuffle=self.configdata['dataloader']['shuffle'],
            pin_memory=self.configdata['dataloader']['pin_memory'])

        # Variables to store accuracies
        gammas = torch.ones(len(idxs)).to(
            self.configdata['train_args']['device'])
        substrn_losses = list(
        )  #np.zeros(configdata['train_args']['num_epochs'])
        trn_losses = list()
        val_losses = list()  #np.zeros(configdata['train_args']['num_epochs'])
        tst_losses = list()
        subtrn_losses = list()
        timing = np.zeros(self.configdata['train_args']['num_epochs'])
        trn_acc = list()
        val_acc = list()  #np.zeros(configdata['train_args']['num_epochs'])
        tst_acc = list()  #np.zeros(configdata['train_args']['num_epochs'])
        subtrn_acc = list()  #np.zeros(configdata['train_args']['num_epochs'])

        # Results logging file
        print_every = self.configdata['train_args']['print_every']
        results_dir = osp.abspath(
            osp.expanduser(self.configdata['train_args']['results_dir']))
        all_logs_dir = os.path.join(
            results_dir, self.configdata['dss_strategy']['type'],
            self.configdata['dataset']['name'],
            str(self.configdata['dss_strategy']['fraction']),
            str(self.configdata['dss_strategy']['select_every']))

        os.makedirs(all_logs_dir, exist_ok=True)
        path_logfile = os.path.join(
            all_logs_dir, self.configdata['dataset']['name'] + '.txt')
        logfile = open(path_logfile, 'w')

        # Model Creation
        model = self.create_model()
        model1 = self.create_model()

        # Loss Functions
        criterion, criterion_nored = self.loss_function()

        # Getting the optimizer and scheduler
        optimizer, scheduler = self.optimizer_with_scheduler(model)

        if self.configdata['dss_strategy']['type'] == 'GradMatch':
            # OMPGradMatch Selection strategy
            setf_model = OMPGradMatchStrategy(
                trainloader,
                valloader,
                model1,
                criterion_nored,
                self.configdata['optimizer']['lr'],
                self.configdata['train_args']['device'],
                num_cls,
                True,
                'PerClassPerGradient',
                False,
                lam=self.configdata['dss_strategy']['lam'],
                eps=1e-100)
        elif self.configdata['dss_strategy']['type'] == 'GradMatchPB':
            setf_model = OMPGradMatchStrategy(
                trainloader,
                valloader,
                model1,
                criterion_nored,
                self.configdata['optimizer']['lr'],
                self.configdata['train_args']['device'],
                num_cls,
                True,
                'PerBatch',
                False,
                lam=self.configdata['dss_strategy']['lam'],
                eps=1e-100)
        elif self.configdata['dss_strategy']['type'] == 'GLISTER':
            # GLISTER Selection strategy
            setf_model = GLISTERStrategy(
                trainloader,
                valloader,
                model1,
                criterion_nored,
                self.configdata['optimizer']['lr'],
                self.configdata['train_args']['device'],
                num_cls,
                False,
                'Stochastic',
                r=int(bud))

        elif self.configdata['dss_strategy']['type'] == 'CRAIG':
            # CRAIG Selection strategy
            setf_model = CRAIGStrategy(trainloader, valloader, model1,
                                       criterion_nored,
                                       self.configdata['train_args']['device'],
                                       num_cls, False, False, 'PerClass')

        elif self.configdata['dss_strategy']['type'] == 'CRAIGPB':
            # CRAIG Selection strategy
            setf_model = CRAIGStrategy(trainloader, valloader, model1,
                                       criterion_nored,
                                       self.configdata['train_args']['device'],
                                       num_cls, False, False, 'PerBatch')

        elif self.configdata['dss_strategy']['type'] == 'CRAIG-Warm':
            # CRAIG Selection strategy
            setf_model = CRAIGStrategy(trainloader, valloader, model1,
                                       criterion_nored,
                                       self.configdata['train_args']['device'],
                                       num_cls, False, False, 'PerClass')
            # Random-Online Selection strategy
            #rand_setf_model = RandomStrategy(trainloader, online=True)
            if 'kappa' in self.configdata['dss_strategy']:
                kappa_epochs = int(self.configdata['dss_strategy']['kappa'] *
                                   self.configdata['train_args']['num_epochs'])
                full_epochs = round(
                    kappa_epochs * self.configdata['dss_strategy']['fraction'])
            else:
                raise KeyError("Specify a kappa value in the config file")

        elif self.configdata['dss_strategy']['type'] == 'CRAIGPB-Warm':
            # CRAIG Selection strategy
            setf_model = CRAIGStrategy(trainloader, valloader, model1,
                                       criterion_nored,
                                       self.configdata['train_args']['device'],
                                       num_cls, False, False, 'PerBatch')
            # Random-Online Selection strategy
            #rand_setf_model = RandomStrategy(trainloader, online=True)
            if 'kappa' in self.configdata['dss_strategy']:
                kappa_epochs = int(self.configdata['dss_strategy']['kappa'] *
                                   self.configdata['train_args']['num_epochs'])
                full_epochs = round(
                    kappa_epochs * self.configdata['dss_strategy']['fraction'])
            else:
                raise KeyError("Specify a kappa value in the config file")

        elif self.configdata['dss_strategy']['type'] == 'Random':
            # Random Selection strategy
            setf_model = RandomStrategy(trainloader, online=False)

        elif self.configdata['dss_strategy']['type'] == 'Random-Online':
            # Random-Online Selection strategy
            setf_model = RandomStrategy(trainloader, online=True)

        elif self.configdata['dss_strategy']['type'] == 'GLISTER-Warm':
            # GLISTER Selection strategy
            setf_model = GLISTERStrategy(
                trainloader,
                valloader,
                model1,
                criterion_nored,
                self.configdata['optimizer']['lr'],
                self.configdata['train_args']['device'],
                num_cls,
                False,
                'Stochastic',
                r=int(bud))
            # Random-Online Selection strategy
            #rand_setf_model = RandomStrategy(trainloader, online=True)
            if 'kappa' in self.configdata['dss_strategy']:
                kappa_epochs = int(self.configdata['dss_strategy']['kappa'] *
                                   self.configdata['train_args']['num_epochs'])
                full_epochs = round(
                    kappa_epochs * self.configdata['dss_strategy']['fraction'])
            else:
                raise KeyError("Specify a kappa value in the config file")

        elif self.configdata['dss_strategy']['type'] == 'GradMatch-Warm':
            # OMPGradMatch Selection strategy
            setf_model = OMPGradMatchStrategy(
                trainloader,
                valloader,
                model1,
                criterion_nored,
                self.configdata['optimizer']['lr'],
                self.configdata['train_args']['device'],
                num_cls,
                True,
                'PerClassPerGradient',
                False,
                lam=self.configdata['dss_strategy']['lam'],
                eps=1e-100)
            # Random-Online Selection strategy
            #rand_setf_model = RandomStrategy(trainloader, online=True)
            if 'kappa' in self.configdata['dss_strategy']:
                kappa_epochs = int(self.configdata['dss_strategy']['kappa'] *
                                   self.configdata['train_args']['num_epochs'])
                full_epochs = round(
                    kappa_epochs * self.configdata['dss_strategy']['fraction'])
            else:
                raise KeyError("Specify a kappa value in the config file")

        elif self.configdata['dss_strategy']['type'] == 'GradMatchPB-Warm':
            # OMPGradMatch Selection strategy
            setf_model = OMPGradMatchStrategy(
                trainloader,
                valloader,
                model1,
                criterion_nored,
                self.configdata['optimizer']['lr'],
                self.configdata['train_args']['device'],
                num_cls,
                True,
                'PerBatch',
                False,
                lam=self.configdata['dss_strategy']['lam'],
                eps=1e-100)
            # Random-Online Selection strategy
            #rand_setf_model = RandomStrategy(trainloader, online=True)
            if 'kappa' in self.configdata['dss_strategy']:
                kappa_epochs = int(self.configdata['dss_strategy']['kappa'] *
                                   self.configdata['train_args']['num_epochs'])
                full_epochs = round(
                    kappa_epochs * self.configdata['dss_strategy']['fraction'])
            else:
                raise KeyError("Specify a kappa value in the config file")

        elif self.configdata['dss_strategy']['type'] == 'Random-Warm':
            if 'kappa' in self.configdata['dss_strategy']:
                kappa_epochs = int(self.configdata['dss_strategy']['kappa'] *
                                   self.configdata['train_args']['num_epochs'])
                full_epochs = round(
                    kappa_epochs * self.configdata['dss_strategy']['fraction'])
            else:
                raise KeyError("Specify a kappa value in the config file")

        print("=======================================", file=logfile)

        for i in range(self.configdata['train_args']['num_epochs']):
            subtrn_loss = 0
            subtrn_correct = 0
            subtrn_total = 0
            subset_selection_time = 0

            if self.configdata['dss_strategy']['type'] in ['Random-Online']:
                start_time = time.time()
                subset_idxs, gammas = setf_model.select(int(bud))
                idxs = subset_idxs
                subset_selection_time += (time.time() - start_time)
                gammas = gammas.to(self.configdata['train_args']['device'])

            elif self.configdata['dss_strategy']['type'] in ['Random']:
                pass

            elif (self.configdata['dss_strategy']['type'] in [
                    'GLISTER', 'GradMatch', 'GradMatchPB', 'CRAIG', 'CRAIGPB'
            ]) and (((i + 1) % self.configdata['dss_strategy']['select_every'])
                    == 0):
                start_time = time.time()
                cached_state_dict = copy.deepcopy(model.state_dict())
                clone_dict = copy.deepcopy(model.state_dict())
                subset_idxs, gammas = setf_model.select(int(bud), clone_dict)
                model.load_state_dict(cached_state_dict)
                idxs = subset_idxs
                if self.configdata['dss_strategy']['type'] in [
                        'GradMatch', 'GradMatchPB', 'CRAIG', 'CRAIGPB'
                ]:
                    gammas = torch.from_numpy(np.array(gammas)).to(
                        self.configdata['train_args']['device']).to(
                            torch.float32)
                subset_selection_time += (time.time() - start_time)

            elif (self.configdata['dss_strategy']['type'] in [
                    'GLISTER-Warm', 'GradMatch-Warm', 'GradMatchPB-Warm',
                    'CRAIG-Warm', 'CRAIGPB-Warm'
            ]):
                start_time = time.time()
                if ((i % self.configdata['dss_strategy']['select_every'] == 0)
                        and (i >= kappa_epochs)):
                    cached_state_dict = copy.deepcopy(model.state_dict())
                    clone_dict = copy.deepcopy(model.state_dict())
                    subset_idxs, gammas = setf_model.select(
                        int(bud), clone_dict)
                    model.load_state_dict(cached_state_dict)
                    idxs = subset_idxs
                    if self.configdata['dss_strategy']['type'] in [
                            'GradMatch-Warm', 'GradMatchPB-Warm', 'CRAIG-Warm',
                            'CRAIGPB-Warm'
                    ]:
                        gammas = torch.from_numpy(np.array(gammas)).to(
                            self.configdata['train_args']['device']).to(
                                torch.float32)
                subset_selection_time += (time.time() - start_time)

            elif self.configdata['dss_strategy']['type'] in ['Random-Warm']:
                pass

            #print("selEpoch: %d, Selection Ended at:" % (i), str(datetime.datetime.now()))
            data_sub = Subset(trainset, idxs)
            subset_trnloader = torch.utils.data.DataLoader(
                data_sub,
                batch_size=trn_batch_size,
                shuffle=False,
                pin_memory=True)

            model.train()
            batch_wise_indices = list(subset_trnloader.batch_sampler)
            if self.configdata['dss_strategy']['type'] in [
                    'CRAIG', 'CRAIGPB', 'GradMatch', 'GradMatchPB'
            ]:
                start_time = time.time()
                for batch_idx, (inputs,
                                targets) in enumerate(subset_trnloader):
                    inputs, targets = inputs.to(
                        self.configdata['train_args']['device']), targets.to(
                            self.configdata['train_args']['device'],
                            non_blocking=True
                        )  # targets can have non_blocking=True.
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    losses = criterion_nored(outputs, targets)
                    loss = torch.dot(
                        losses, gammas[batch_wise_indices[batch_idx]]) / (
                            gammas[batch_wise_indices[batch_idx]].sum())
                    loss.backward()
                    subtrn_loss += loss.item()
                    optimizer.step()
                    _, predicted = outputs.max(1)
                    subtrn_total += targets.size(0)
                    subtrn_correct += predicted.eq(targets).sum().item()
                train_time = time.time() - start_time

            elif self.configdata['dss_strategy']['type'] in [
                    'CRAIGPB-Warm', 'CRAIG-Warm', 'GradMatch-Warm',
                    'GradMatchPB-Warm'
            ]:
                start_time = time.time()
                if i < full_epochs:
                    for batch_idx, (inputs, targets) in enumerate(trainloader):
                        inputs, targets = inputs.to(
                            self.configdata['train_args']
                            ['device']), targets.to(
                                self.configdata['train_args']['device'],
                                non_blocking=True
                            )  # targets can have non_blocking=True.
                        optimizer.zero_grad()
                        outputs = model(inputs)
                        loss = criterion(outputs, targets)
                        loss.backward()
                        subtrn_loss += loss.item()
                        optimizer.step()
                        _, predicted = outputs.max(1)
                        subtrn_total += targets.size(0)
                        subtrn_correct += predicted.eq(targets).sum().item()

                elif i >= kappa_epochs:
                    for batch_idx, (inputs,
                                    targets) in enumerate(subset_trnloader):
                        inputs, targets = inputs.to(
                            self.configdata['train_args']
                            ['device']), targets.to(
                                self.configdata['train_args']['device'],
                                non_blocking=True
                            )  # targets can have non_blocking=True.
                        optimizer.zero_grad()
                        outputs = model(inputs)
                        losses = criterion_nored(outputs, targets)
                        loss = torch.dot(
                            losses, gammas[batch_wise_indices[batch_idx]]) / (
                                gammas[batch_wise_indices[batch_idx]].sum())
                        loss.backward()
                        subtrn_loss += loss.item()
                        optimizer.step()
                        _, predicted = outputs.max(1)
                        subtrn_total += targets.size(0)
                        subtrn_correct += predicted.eq(targets).sum().item()
                train_time = time.time() - start_time

            elif self.configdata['dss_strategy']['type'] in [
                    'GLISTER', 'Random', 'Random-Online'
            ]:
                start_time = time.time()
                for batch_idx, (inputs,
                                targets) in enumerate(subset_trnloader):
                    inputs, targets = inputs.to(
                        self.configdata['train_args']['device']), targets.to(
                            self.configdata['train_args']['device'],
                            non_blocking=True
                        )  # targets can have non_blocking=True.
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    loss.backward()
                    subtrn_loss += loss.item()
                    optimizer.step()
                    _, predicted = outputs.max(1)
                    subtrn_total += targets.size(0)
                    subtrn_correct += predicted.eq(targets).sum().item()
                train_time = time.time() - start_time

            elif self.configdata['dss_strategy']['type'] in [
                    'GLISTER-Warm', 'Random-Warm'
            ]:
                start_time = time.time()
                if i < full_epochs:
                    for batch_idx, (inputs, targets) in enumerate(trainloader):
                        inputs, targets = inputs.to(
                            self.configdata['train_args']
                            ['device']), targets.to(
                                self.configdata['train_args']['device'],
                                non_blocking=True
                            )  # targets can have non_blocking=True.
                        optimizer.zero_grad()
                        outputs = model(inputs)
                        loss = criterion(outputs, targets)
                        loss.backward()
                        subtrn_loss += loss.item()
                        optimizer.step()
                        _, predicted = outputs.max(1)
                        subtrn_total += targets.size(0)
                        subtrn_correct += predicted.eq(targets).sum().item()
                elif i >= kappa_epochs:
                    for batch_idx, (inputs,
                                    targets) in enumerate(subset_trnloader):
                        inputs, targets = inputs.to(
                            self.configdata['train_args']
                            ['device']), targets.to(
                                self.configdata['train_args']['device'],
                                non_blocking=True
                            )  # targets can have non_blocking=True.
                        optimizer.zero_grad()
                        outputs = model(inputs)
                        loss = criterion(outputs, targets)
                        loss.backward()
                        subtrn_loss += loss.item()
                        optimizer.step()
                        _, predicted = outputs.max(1)
                        subtrn_total += targets.size(0)
                        subtrn_correct += predicted.eq(targets).sum().item()
                train_time = time.time() - start_time

            elif self.configdata['dss_strategy']['type'] in ['Full']:
                start_time = time.time()
                for batch_idx, (inputs, targets) in enumerate(trainloader):
                    inputs, targets = inputs.to(
                        self.configdata['train_args']['device']), targets.to(
                            self.configdata['train_args']['device'],
                            non_blocking=True
                        )  # targets can have non_blocking=True.
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    loss.backward()
                    subtrn_loss += loss.item()
                    optimizer.step()
                    _, predicted = outputs.max(1)
                    subtrn_total += targets.size(0)
                    subtrn_correct += predicted.eq(targets).sum().item()
                train_time = time.time() - start_time
            scheduler.step()
            timing[i] = train_time + subset_selection_time
            print_args = self.configdata['train_args']['print_args']
            # print("Epoch timing is: " + str(timing[i]))
            if ((i + 1) % self.configdata['train_args']['print_every'] == 0):
                trn_loss = 0
                trn_correct = 0
                trn_total = 0
                val_loss = 0
                val_correct = 0
                val_total = 0
                tst_correct = 0
                tst_total = 0
                tst_loss = 0
                model.eval()

                if "trn_loss" in print_args:
                    with torch.no_grad():
                        for batch_idx, (inputs,
                                        targets) in enumerate(trainloader):
                            # print(batch_idx)
                            inputs, targets = inputs.to(
                                self.configdata['train_args']
                                ['device']), targets.to(
                                    self.configdata['train_args']['device'],
                                    non_blocking=True)
                            outputs = model(inputs)
                            loss = criterion(outputs, targets)
                            trn_loss += loss.item()
                            trn_losses.append(trn_loss)
                            if "trn_acc" in print_args:
                                _, predicted = outputs.max(1)
                                trn_total += targets.size(0)
                                trn_correct += predicted.eq(
                                    targets).sum().item()
                                trn_acc.append(trn_correct / trn_total)

                if "val_loss" in print_args:
                    with torch.no_grad():
                        for batch_idx, (inputs,
                                        targets) in enumerate(valloader):
                            # print(batch_idx)
                            inputs, targets = inputs.to(
                                self.configdata['train_args']
                                ['device']), targets.to(
                                    self.configdata['train_args']['device'],
                                    non_blocking=True)
                            outputs = model(inputs)
                            loss = criterion(outputs, targets)
                            val_loss += loss.item()
                            val_losses.append(val_loss)
                            if "val_acc" in print_args:
                                _, predicted = outputs.max(1)
                                val_total += targets.size(0)
                                val_correct += predicted.eq(
                                    targets).sum().item()
                                val_acc.append(val_correct / val_total)

                if "tst_loss" in print_args:
                    for batch_idx, (inputs, targets) in enumerate(testloader):
                        # print(batch_idx)
                        inputs, targets = inputs.to(
                            self.configdata['train_args']
                            ['device']), targets.to(
                                self.configdata['train_args']['device'],
                                non_blocking=True)
                        outputs = model(inputs)
                        loss = criterion(outputs, targets)
                        tst_loss += loss.item()
                        tst_losses.append(tst_loss)
                        if "tst_acc" in print_args:
                            _, predicted = outputs.max(1)
                            tst_total += targets.size(0)
                            tst_correct += predicted.eq(targets).sum().item()
                            tst_acc.append(tst_correct / tst_total)

                if "subtrn_acc" in print_args:
                    subtrn_acc.append(subtrn_correct / subtrn_total)

                if "subtrn_losses" in print_args:
                    subtrn_losses.append(subtrn_loss)

                print_str = "Epoch: " + str(i + 1)

                for arg in print_args:

                    if arg == "val_loss":
                        print_str += " , " + "Validation Loss: " + str(
                            val_losses[-1])

                    if arg == "val_acc":
                        print_str += " , " + "Validation Accuracy: " + str(
                            val_acc[-1])

                    if arg == "tst_loss":
                        print_str += " , " + "Test Loss: " + str(
                            tst_losses[-1])

                    if arg == "tst_acc":
                        print_str += " , " + "Test Accuracy: " + str(
                            tst_acc[-1])

                    if arg == "trn_loss":
                        print_str += " , " + "Training Loss: " + str(
                            trn_losses[-1])

                    if arg == "trn_acc":
                        print_str += " , " + "Training Accuracy: " + str(
                            trn_acc[-1])

                    if arg == "subtrn_loss":
                        print_str += " , " + "Subset Loss: " + str(
                            subtrn_losses[-1])

                    if arg == "subtrn_acc":
                        print_str += " , " + "Subset Accuracy: " + str(
                            subtrn_acc[-1])

                    if arg == "time":
                        print_str += " , " + "Timing: " + str(timing[i])

                # report metric to ray for hyperparameter optimization
                if 'report_tune' in self.configdata and self.configdata[
                        'report_tune']:
                    tune.report(mean_accuracy=val_acc[-1])

                print(print_str)

        print(self.configdata['dss_strategy']['type'] +
              " Selection Run---------------------------------")
        print("Final SubsetTrn:", subtrn_loss)
        if "val_loss" in print_args:
            if "val_acc" in print_args:
                print("Validation Loss and Accuracy: ", val_loss,
                      np.array(val_acc).max())
            else:
                print("Validation Loss: ", val_loss)

        if "tst_loss" in print_args:
            if "tst_acc" in print_args:
                print("Test Data Loss and Accuracy: ", tst_loss,
                      np.array(tst_acc).max())
            else:
                print("Test Data Loss: ", tst_loss)
        print('-----------------------------------')
        print(self.configdata['dss_strategy']['type'], file=logfile)
        print(
            '---------------------------------------------------------------------',
            file=logfile)

        if "val_acc" in print_args:
            val_str = "Validation Accuracy, "
            for val in val_acc:
                val_str = val_str + " , " + str(val)
            print(val_str, file=logfile)

        if "tst_acc" in print_args:
            tst_str = "Test Accuracy, "
            for tst in tst_acc:
                tst_str = tst_str + " , " + str(tst)
            print(tst_str, file=logfile)

        if "time" in print_args:
            time_str = "Time, "
            for t in timing:
                time_str = time_str + " , " + str(t)
            print(timing, file=logfile)

        omp_timing = np.array(timing)
        omp_cum_timing = list(self.generate_cumulative_timing(omp_timing))
        print("Total time taken by " +
              self.configdata['dss_strategy']['type'] + " = " +
              str(omp_cum_timing[-1]))
        logfile.close()
示例#29
0
    def __init__(self, opt):
        """
        Modulate the data ratio in the batch.
        For example, when select_data is "MJ-ST" and batch_ratio is "0.5-0.5",
        the 50% of the batch is filled with MJ and the other 50% of the batch is filled with ST.
        """
        log = open(f'./saved_models/{opt.experiment_name}/log_dataset.txt',
                   'a')
        dashed_line = '-' * 80
        print(dashed_line)
        log.write(dashed_line + '\n')
        print(
            f'dataset_root: {opt.train_data}\nopt.select_data: {opt.select_data}\nopt.batch_ratio: {opt.batch_ratio}'
        )
        log.write(
            f'dataset_root: {opt.train_data}\nopt.select_data: {opt.select_data}\nopt.batch_ratio: {opt.batch_ratio}\n'
        )
        assert len(opt.select_data) == len(opt.batch_ratio)

        _AlignCollate = AlignCollate(imgH=opt.imgH,
                                     imgW=opt.imgW,
                                     keep_ratio_with_pad=opt.PAD)
        self.data_loader_list = []
        self.dataloader_iter_list = []
        batch_size_list = []
        Total_batch_size = 0
        for selected_d, batch_ratio_d in zip(opt.select_data, opt.batch_ratio):
            _batch_size = max(round(opt.batch_size * float(batch_ratio_d)), 1)
            print(dashed_line)
            log.write(dashed_line + '\n')
            _dataset, _dataset_log = hierarchical_dataset(
                root=opt.train_data, opt=opt, select_data=[selected_d])
            total_number_dataset = len(_dataset)
            log.write(_dataset_log)
            """
            The total number of data can be modified with opt.total_data_usage_ratio.
            ex) opt.total_data_usage_ratio = 1 indicates 100% usage, and 0.2 indicates 20% usage.
            See 4.2 section in our paper.
            """
            number_dataset = int(total_number_dataset *
                                 float(opt.total_data_usage_ratio))
            dataset_split = [
                number_dataset, total_number_dataset - number_dataset
            ]
            indices = range(total_number_dataset)
            _dataset, _ = [
                Subset(_dataset, indices[offset - length:offset]) for offset,
                length in zip(_accumulate(dataset_split), dataset_split)
            ]
            selected_d_log = f'num total samples of {selected_d}: {total_number_dataset} x {opt.total_data_usage_ratio} (total_data_usage_ratio) = {len(_dataset)}\n'
            selected_d_log += f'num samples of {selected_d} per batch: {opt.batch_size} x {float(batch_ratio_d)} (batch_ratio) = {_batch_size}'
            print(selected_d_log)
            log.write(selected_d_log + '\n')
            batch_size_list.append(str(_batch_size))
            Total_batch_size += _batch_size

            _data_loader = torch.utils.data.DataLoader(
                _dataset,
                batch_size=_batch_size,
                shuffle=True,
                num_workers=int(opt.workers),
                collate_fn=_AlignCollate,
                pin_memory=True)
            self.data_loader_list.append(_data_loader)
            self.dataloader_iter_list.append(iter(_data_loader))

        Total_batch_size_log = f'{dashed_line}\n'
        batch_size_sum = '+'.join(batch_size_list)
        Total_batch_size_log += f'Total_batch_size: {batch_size_sum} = {Total_batch_size}\n'
        Total_batch_size_log += f'{dashed_line}'
        opt.batch_size = Total_batch_size

        print(Total_batch_size_log)
        log.write(Total_batch_size_log + '\n')
        log.close()
示例#30
0
def preTrainFeatureExtractor(feature_extractor,
                             dataset,
                             batch_size,
                             num_epochs,
                             optimizer=Adam,
                             cuda=True):
    # print("Començem el pre-entrenament del feature extractor\n")
    if cuda:
        gpu = torch.device("cuda:0")
        feature_extractor = feature_extractor.to(gpu)
    feature_extractor.train()
    torch.manual_seed(0)
    idxs = torch.randperm(len(dataset))
    evaldataset = Subset(dataset, idxs[:int(len(dataset) / 6)])
    traindataset = Subset(dataset, idxs[int(len(dataset) / 6):])
    # Generamos los dataloaders
    traindataloader = DataLoader(traindataset,
                                 batch_size=batch_size,
                                 shuffle=True,
                                 pin_memory=True,
                                 drop_last=False)
    evaldataloader = DataLoader(evaldataset,
                                batch_size=batch_size,
                                pin_memory=True,
                                drop_last=False)

    optimizer = optimizer(feature_extractor.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    training_losses = []
    eval_losses = []

    running_loss = 0
    total = 0
    for epoch in range(num_epochs):
        for i, data in enumerate(traindataloader):
            data1, label = data
            # Dupliquem les dades (ja que la xarxa espera una entrada amb dos canals), però apliquem una
            # máscara per tal de que tots els parámetres de la xarxa aprenguin.
            data2 = copy.deepcopy(data1)
            mask = torch.randint(0, 2, data1.shape)
            data1[mask == 0] = 0
            data2[mask == 1] = 0
            data = torch.cat((data1, data2), dim=1)

            if cuda:
                data = data.to(gpu)
                label = label.to(gpu)
            output = feature_extractor(data)
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()
            feature_extractor.zero_grad()

            running_loss += loss
            total += label.size(0)
            """
            if i % (10000//batch_size) == 10000//batch_size-1:
                print("{}.{} La loss mitjana sobre les últimes {} dades és {}".format(epoch, i//(10000//batch_size), total, running_loss/total))
                training_losses.append(running_loss/total)
                running_loss = 0
                total = 0

                with torch.no_grad():
                    for data1, label in evaldataloader:
                        data2 = copy.deepcopy(data1)
                        mask = torch.randint(0, 2, data1.shape)
                        data1[mask == 0] = 0
                        data2[mask == 1] = 0
                        data = torch.cat((data1, data2), dim=1)
                        if cuda:
                            data = data.cuda()
                            label = label.cuda()
                        output = feature_extractor(data)
                        running_loss += criterion(output, label)
                        total += label.size(0)
                    print("{}.{} La loss mitjana sobre el conjunt de validació és {}".format(epoch,
                                                                                          i // (10000 // batch_size),
                                                                                          running_loss/total))
                    eval_losses.append(running_loss/total)
                    running_loss = 0
                    total = 0


    # print("\nEl pre-entrenament del feature extractor ha finalitzat\n")


    plot1, = plt.plot(training_losses, 'r', label="train_loss")
    plot2, = plt.plot(eval_losses, 'b', label="eval_loss")
    plt.legend(handles=[plot1, plot2])
    plt.show()

    correct = 0
    total = 0
    with torch.no_grad():
        for inputs1, labels in evaldataloader:
            inputs2 = copy.deepcopy(inputs1)
            mask = torch.randint(0, 2, inputs1.shape)
            inputs1[mask == 0] = 0
            inputs2[mask == 1] = 0
            inputs = torch.cat((inputs1, inputs2), dim=1)
            if cuda:
               inputs, labels = inputs.cuda(), labels.cuda()

            predictions = feature_extractor(inputs)

            _, predictions = torch.max(predictions.data, 1)
            total += labels.size(0)
            correct += (predictions == labels).sum().item()

    print("Accuracy of the network over the eval data is: ", (100 * correct / total))
    """
    feature_extractor.eval()
    return feature_extractor.cpu()