示例#1
0
def k_win_train(model, dataset_list, num_epochs):
    dataset = ConcatDataset(dataset_list)
    if (dataset.__len__() < 2):
        raise Exception("Dataset is too small")
    learning_rate = 0.001
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        print(f'Epoch [{epoch+1}/{num_epochs}]')
        #split the dataset into train and test

        train_lenght = int(dataset.__len__() * 0.8)
        test_lenght = dataset.__len__() - train_lenght

        train_dataset, test_dataset = random_split(dataset,
                                                   (train_lenght, test_lenght))

        train_loader = DataLoader(dataset=train_dataset,
                                  batch_size=10,
                                  shuffle=True)

        test_loader = DataLoader(dataset=test_dataset,
                                 batch_size=10,
                                 shuffle=True)

        #training
        train_step(model, train_loader, learning_rate, criterion, optimizer)

        #validation
        totP = validation_step(model, test_loader)

    return totP
示例#2
0
def patients_dataset():
    patients = []
    for pats in os.listdir(data_root):
        if pats in ['patient23', 'patient59', 'patient85']:
            continue
        patients.append(Patient(os.path.join(data_root, pats)))
    sets = ConcatDataset(patients)
    train_len = int(sets.__len__() * 0.75)
    train, valid = random_split(sets, [train_len, sets.__len__() - train_len])
    return train, valid
示例#3
0
def image_normalization(traindir, validdir, verbose=1):
    # Load data from folders
    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.ToTensor()  # rescale to [0.0, 1.0]
    )
    valid_dataset = datasets.ImageFolder(
        validdir,
        transforms.ToTensor()  # rescale to [0.0, 1.0]
    )

    # Combine train and valid datasets
    dataset = ConcatDataset([train_dataset, valid_dataset])
    if verbose > 1:
        print('Samples for normalization: \t%d' % (dataset.__len__()))

    # Create data loaders
    # The larger the batch_size the better the mean and std approximation
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=50,
                                             shuffle=True,
                                             num_workers=0)

    imgs_mean = []
    imgs_std0 = []
    imgs_std1 = []

    # Use GPU if available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    for i, (images, labels) in enumerate(dataloader, 0):
        # Copy to GPU if available
        images, labels = images.to(device), labels.to(device)

        # shape (batch_size, 3, height, width)
        images = images.numpy()

        batch_mean = np.mean(images, axis=(0, 2, 3))
        batch_std0 = np.std(images, axis=(0, 2, 3))
        batch_std1 = np.std(images, axis=(0, 2, 3),
                            ddof=1)  # not sure what this std is for :(

        imgs_mean.append(batch_mean)
        imgs_std0.append(batch_std0)
        imgs_std1.append(batch_std1)

    # shape (num_iterations, 3) -> (mean across 0th axis) -> shape (3,)
    imgs_mean = np.array(imgs_mean).mean(axis=0)
    imgs_std0 = np.array(imgs_std0).mean(axis=0)
    imgs_std1 = np.array(imgs_std1).mean(axis=0)

    if verbose > 1:
        print("Dataset Normalization: mean={}, std1={}, std2{}".format(
            imgs_mean, imgs_std0, imgs_std1))

    return imgs_mean, imgs_std0, imgs_std1
class MultiwayDataset:
    def __init__(self, pairs, tokenizer, dictionary):
        self.datasets = [
            ParallelDataset(fst, snd, tokenizer, dictionary)
            for fst, snd in pairs
        ]
        self.concatenated = ConcatDataset(self.datasets)
        self.lengths = list(
            itertools.chain(*[dataset.lengths for dataset in self.datasets]))

        self.indices = np.argsort(self.lengths)

    def __getitem__(self, idx):
        # idy = self.indices[idx]
        # print(idy, idx, self.lengths[idx])
        return self.concatenated.__getitem__(idx)

    def __len__(self):
        return self.concatenated.__len__()
示例#5
0
def main():
    parser = argparse.ArgumentParser(description='Commonsense Dataset Dev')

    # Experiment params
    parser.add_argument('--mode', type=str, help='train or test mode', required=True, choices=['train', 'test'])
    parser.add_argument('--expt_dir', type=str, help='root directory to save model & summaries')
    parser.add_argument('--expt_name', type=str, help='expt_dir/expt_name: organize experiments')
    parser.add_argument('--run_name', type=str, help='expt_dir/expt_name/run_name: organize training runs')
    parser.add_argument('--test_file', type=str, default='test',
                        help='The file containing test data to evaluate in test mode.')

    # Model params
    parser.add_argument('--model', type=str, help='transformer model (e.g. roberta-base)', required=True)
    parser.add_argument('--num_layers', type=int,
                        help='Number of hidden layers in transformers (default number if not provided)', default=-1)
    parser.add_argument('--seq_len', type=int, help='tokenized input sequence length', default=256)
    parser.add_argument('--num_cls', type=int, help='model number of classes', default=2)
    parser.add_argument('--ckpt', type=str, help='path to model checkpoint .pth file')

    # Data params
    parser.add_argument('--pred_file', type=str, help='address of prediction csv file, for "test" mode',
                        default='results.csv')
    parser.add_argument('--dataset', type=str, default='com2sense')
    # Training params
    parser.add_argument('--lr', type=float, help='learning rate', default=1e-5)
    parser.add_argument('--epochs', type=int, help='number of epochs', default=100)
    parser.add_argument('--batch_size', type=int, help='batch size', default=8)
    parser.add_argument('--acc_step', type=int, help='gradient accumulation steps', default=1)
    parser.add_argument('--log_interval', type=int, help='interval size for logging training summaries', default=100)
    parser.add_argument('--save_interval', type=int, help='save model after `n` weight update steps', default=30000)
    parser.add_argument('--val_size', type=int, help='validation set size for evaluating metrics, '
                                                     'and it need to be even to get pairwise accuracy', default=2048)

    # GPU params
    parser.add_argument('--gpu_ids', type=str, help='GPU IDs (0,1,2,..) seperated by comma', default='0')
    parser.add_argument('-data_parallel',
                        help='Whether to use nn.dataparallel (currently available for BERT-based models)',
                        action='store_true')
    parser.add_argument('--use_amp', type=str2bool, help='Automatic-Mixed Precision (T/F)', default='T')
    parser.add_argument('-cpu', help='use cpu only (for test)', action='store_true')

    # Misc params
    parser.add_argument('--num_workers', type=int, help='number of worker threads for Dataloader', default=1)

    # Parse Args
    args = parser.parse_args()

    # Dataset list
    dataset_names = csv2list(args.dataset)
    print()

    # Multi-GPU
    device_ids = csv2list(args.gpu_ids, int)
    print('Selected GPUs: {}'.format(device_ids))

    # Device for loading dataset (batches)
    device = torch.device(device_ids[0])
    if args.cpu:
        device = torch.device('cpu')

    # Text-to-Text
    text2text = ('t5' in args.model)
    uniqa = ('unified' in args.model)

    assert not (text2text and args.use_amp == 'T'), 'use_amp should be F when using T5-based models.'
    # Train params
    n_epochs = args.epochs
    batch_size = args.batch_size
    lr = args.lr
    accumulation_steps = args.acc_step
    # Todo: Verify the grad-accum code (loss avging seems slightly incorrect)

    # Train
    if args.mode == 'train':
        # Ensure CUDA available for training
        assert torch.cuda.is_available(), 'No CUDA device for training!'

        # Setup train log directory
        log_dir = os.path.join(args.expt_dir, args.expt_name, args.run_name)

        if not os.path.exists(log_dir):
            os.makedirs(log_dir)

        # TensorBoard summaries setup  -->  /expt_dir/expt_name/run_name/
        writer = SummaryWriter(log_dir)

        # Train log file
        log_file = setup_logger(parser, log_dir)

        print('Training Log Directory: {}\n'.format(log_dir))

        # Dataset & Dataloader
        dataset = BaseDataset('train', tokenizer=args.model, max_seq_len=args.seq_len, text2text=text2text, uniqa=uniqa)
        train_datasets = ConcatDataset([dataset])

        dataset = BaseDataset('dev', tokenizer=args.model, max_seq_len=args.seq_len, text2text=text2text, uniqa=uniqa)
        val_datasets = ConcatDataset([dataset])

        train_loader = DataLoader(train_datasets, batch_size, shuffle=True, drop_last=True,
                                  num_workers=args.num_workers)
        val_loader = DataLoader(val_datasets, batch_size, shuffle=True, drop_last=True, num_workers=args.num_workers)

        # In multi-dataset setups, also track dataset-specific loaders for validation metrics
        val_dataloaders = []
        if len(dataset_names) > 1:
            for val_dset in val_datasets.datasets:
                loader = DataLoader(val_dset, batch_size, shuffle=True, drop_last=True, num_workers=args.num_workers)

                val_dataloaders.append(loader)

        # Tokenizer
        tokenizer = dataset.get_tokenizer()

        # Split sizes
        train_size = train_datasets.__len__()
        val_size = val_datasets.__len__()
        log_msg = 'Train: {} \nValidation: {}\n\n'.format(train_size, val_size)

        # Min of the total & subset size
        val_used_size = min(val_size, args.val_size)
        log_msg += 'Validation Accuracy is computed using {} samples. See --val_size\n'.format(val_used_size)

        log_msg += 'No. of Classes: {}\n'.format(args.num_cls)
        print_log(log_msg, log_file)

        # Build Model
        model = Transformer(args.model, args.num_cls, text2text, device_ids, num_layers=args.num_layers)
        if args.data_parallel and not args.ckpt:
            model = nn.DataParallel(model, device_ids=device_ids)
            device = torch.device(f'cuda:{model.device_ids[0]}')

        if not text2text:
            model.to(device)

        model.train()

        # Loss & Optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr)
        optimizer.zero_grad()

        scaler = GradScaler(enabled=args.use_amp)

        # Step & Epoch
        start_epoch = 1
        curr_step = 1
        best_val_acc = 0.0

        # Load model checkpoint file (if specified)
        if args.ckpt:
            checkpoint = torch.load(args.ckpt, map_location=device)

            # Load model & optimizer
            model.load_state_dict(checkpoint['model_state_dict'])
            if args.data_parallel:
                model = nn.DataParallel(model, device_ids=device_ids)
                device = torch.device(f'cuda:{model.device_ids[0]}')
            model.to(device)

            curr_step = checkpoint['curr_step']
            start_epoch = checkpoint['epoch']
            prev_loss = checkpoint['loss']

            log_msg = 'Resuming Training...\n'
            log_msg += 'Model successfully loaded from {}\n'.format(args.ckpt)
            log_msg += 'Training loss: {:2f} (from ckpt)\n'.format(prev_loss)

            print_log(log_msg, log_file)

        steps_per_epoch = len(train_loader)
        start_time = time()

        for epoch in range(start_epoch, start_epoch + n_epochs):
            for batch in tqdm(train_loader):
                # Load batch to device
                batch = {k: v.to(device) for k, v in batch.items()}

                with autocast(args.use_amp):
                    if text2text:
                        # Forward + Loss
                        output = model(batch)
                        loss = output[0]

                    else:
                        # Forward Pass
                        label_logits = model(batch)
                        label_gt = batch['label']

                        # Compute Loss
                        loss = criterion(label_logits, label_gt)

                if args.data_parallel:
                    loss = loss.mean()
                # Backward Pass
                loss /= accumulation_steps
                scaler.scale(loss).backward()

                if curr_step % accumulation_steps == 0:
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad()

                # Print Results - Loss value & Validation Accuracy
                if curr_step % args.log_interval == 0:
                    # Validation set accuracy
                    if val_datasets:
                        val_metrics = compute_eval_metrics(model, val_loader, device, val_used_size, tokenizer,
                                                           text2text, parallel=args.data_parallel)

                        # Reset the mode to training
                        model.train()

                        log_msg = 'Validation Accuracy: {:.2f} %  || Validation Loss: {:.4f}'.format(
                            val_metrics['accuracy'], val_metrics['loss'])

                        print_log(log_msg, log_file)

                        # Add summaries to TensorBoard
                        writer.add_scalar('Val/Loss', val_metrics['loss'], curr_step)
                        writer.add_scalar('Val/Accuracy', val_metrics['accuracy'], curr_step)

                    # Add summaries to TensorBoard
                    writer.add_scalar('Train/Loss', loss.item(), curr_step)

                    # Compute elapsed & remaining time for training to complete
                    time_elapsed = (time() - start_time) / 3600

                    log_msg = 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f} | time elapsed: {:.2f}h |'.format(
                        epoch, n_epochs, curr_step, steps_per_epoch, loss.item(), time_elapsed)

                    print_log(log_msg, log_file)

                # Save the model
                if curr_step % args.save_interval == 0:
                    path = os.path.join(log_dir, 'model_' + str(curr_step) + '.pth')

                    state_dict = {'model_state_dict': model.state_dict(),
                                  'curr_step': curr_step, 'loss': loss.item(),
                                  'epoch': epoch, 'val_accuracy': best_val_acc}

                    torch.save(state_dict, path)

                    log_msg = 'Saving the model at the {} step to directory:{}'.format(curr_step, log_dir)
                    print_log(log_msg, log_file)

                curr_step += 1

            # Validation accuracy on the entire set
            if val_datasets:
                log_msg = '-------------------------------------------------------------------------\n'
                val_metrics = compute_eval_metrics(model, val_loader, device, val_size, tokenizer, text2text,
                                                   parallel=args.data_parallel)

                log_msg += '\nAfter {} epoch:\n'.format(epoch)
                log_msg += 'Validation Accuracy: {:.2f} %  || Validation Loss: {:.4f}\n'.format(
                    val_metrics['accuracy'], val_metrics['loss'])

                # For Multi-Dataset setup:
                if len(dataset_names) > 1:
                    # compute validation set metrics on each dataset independently
                    for loader in val_dataloaders:
                        metrics = compute_eval_metrics(model, loader, device, val_size, tokenizer, text2text,
                                                       parallel=args.data_parallel)

                        log_msg += '\n --> {}\n'.format(loader.dataset.get_classname())
                        log_msg += 'Validation Accuracy: {:.2f} %  || Validation Loss: {:.4f}\n'.format(
                            metrics['accuracy'], metrics['loss'])

                # Save best model after every epoch
                if val_metrics["accuracy"] > best_val_acc:
                    best_val_acc = val_metrics["accuracy"]

                    step = '{:.1f}k'.format(curr_step / 1000) if curr_step > 1000 else '{}'.format(curr_step)
                    filename = 'ep_{}_stp_{}_acc_{:.4f}_{}.pth'.format(
                        epoch, step, best_val_acc, args.model.replace('-', '_').replace('/', '_'))

                    path = os.path.join(log_dir, filename)
                    if args.data_parallel:
                        model_state_dict = model.module.state_dict()
                    else:
                        model_state_dict = model.state_dict()
                    state_dict = {'model_state_dict': model_state_dict,
                                  'curr_step': curr_step, 'loss': loss.item(),
                                  'epoch': epoch, 'val_accuracy': best_val_acc}

                    torch.save(state_dict, path)

                    log_msg += "\n** Best Performing Model: {:.2f} ** \nSaving weights at {}\n".format(best_val_acc,
                                                                                                       path)

                log_msg += '-------------------------------------------------------------------------\n\n'
                print_log(log_msg, log_file)

                # Reset the mode to training
                model.train()

        writer.close()
        log_file.close()

    elif args.mode == 'test':

        # Dataloader
        dataset = BaseDataset(args.test_file, tokenizer=args.model, max_seq_len=args.seq_len, text2text=text2text,
                              uniqa=uniqa)

        loader = DataLoader(dataset, batch_size, num_workers=args.num_workers)

        tokenizer = dataset.get_tokenizer()

        model = Transformer(args.model, args.num_cls, text2text, num_layers=args.num_layers)
        model.eval()
        model.to(device)

        # Load model weights
        if args.ckpt:
            checkpoint = torch.load(args.ckpt, map_location=device)
            model.load_state_dict(checkpoint['model_state_dict'])
        data_len = dataset.__len__()
        print('Total Samples: {}'.format(data_len))

        is_pairwise = 'com2sense' in dataset_names

        # Inference
        metrics = compute_eval_metrics(model, loader, device, data_len, tokenizer, text2text, is_pairwise=is_pairwise,
                                       is_test=True, parallel=args.data_parallel)

        df = pd.DataFrame(metrics['meta'])
        df.to_csv(args.pred_file)

        print(f'Results for model {args.model}')
        print(f'Results evaluated on file {args.test_file}')
        print('Sentence Accuracy: {:.4f}'.format(metrics['accuracy']))
        if is_pairwise:
            print('Pairwise Accuracy: {:.4f}'.format(metrics['pair_acc']))
示例#6
0
val_transform = transforms.Compose([
    ToTensor(),
])

train_datasets = [
    LyftDataset(dir, hood_path, top, bottom, train_transform,
                transforms.ToTensor(), False) for dir in train_dirs
]
train_dataset = ConcatDataset(train_datasets)

val_datasets = [
    LyftDataset(dir, hood_path, top, bottom, val_transform,
                transforms.ToTensor(), False) for dir in val_dirs
]
val_dataset = ConcatDataset(val_datasets)
print("Train imgs:", train_dataset.__len__())
print("Val imgs:", val_dataset.__len__())

assert torch.cuda.is_available(), "Sorry, no CUDA device found"

train_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch, shuffle=False)

device = torch.device("cuda")

train_loss = LyftLoss(bce_w=train_bce_w,
                      car_w=train_car_w,
                      other_w=train_other_w).to(device)
val_loss = LyftLoss(bce_w=val_bce_w, car_w=val_car_w,
                    other_w=val_other_w).to(device)
    def __init__(self, args):
        self.args = args
        if args.mode == 'train':
            data_dirs = os.listdir(args.data_dir)
            data_transform = image_transforms(
                mode=args.mode,
                tensor_type=args.tensor_type,
                augment_parameters=args.augment_parameters,
                do_augmentation=args.do_augmentation)
            train_datasets = [
                KittiLoader(os.path.join(args.data_dir, data_dir),
                            True,
                            transform=data_transform) for data_dir in data_dirs
            ]
            train_dataset = ConcatDataset(train_datasets)
            self.n_img = train_dataset.__len__()
            print('Use a dataset with', self.n_img, 'images')
            self.train_loader = DataLoader(train_dataset,
                                           batch_size=args.batch_size,
                                           shuffle=True)
            self.device = torch.device(
                ('cuda:0' if torch.cuda.is_available()
                 and args.tensor_type == 'torch.cuda.FloatTensor' else 'cpu'))
            self.loss_function = MonodepthLoss(
                n=4,
                SSIM_w=0.85,
                disp_gradient_w=0.1,
                lr_w=1,
                tensor_type=args.tensor_type).to(self.device)
            if args.model == 'resnet50_md':
                self.model = models_resnet.resnet50_md(3)
            elif args.model == 'resnet18_md':
                self.model = models_resnet.resnet18_md(3)
            self.model = self.model.to(self.device)
            self.optimizer = optim.Adam(self.model.parameters(),
                                        lr=args.learning_rate)
            if args.tensor_type == 'torch.cuda.FloatTensor':
                torch.cuda.synchronize()
        elif args.mode == 'test':
            self.output_directory = args.output_directory

            # loading data

            self.input_height = args.input_height
            self.input_width = args.input_width
            data_transform = image_transforms(mode=args.mode,
                                              tensor_type=args.tensor_type)

            test_dataset = ImageLoader(args.data_dir,
                                       False,
                                       transform=data_transform)
            self.num_test_examples = test_dataset.__len__()
            self.test_loader = DataLoader(test_dataset,
                                          batch_size=1,
                                          shuffle=False)

            # set up CPU device

            self.device = torch.device('cpu')

            # define model

            if args.model == 'resnet50_md':
                self.model = models_resnet.resnet50_md(3)
            elif args.model == 'resnet18_md':
                self.model = models_resnet.resnet18_md(3)
            self.model.load_state_dict(torch.load(args.model_path))
            self.model = self.model.to(self.device)