def open_rgby(path, filename, train=True, stats=None): """ a function that reads RGBY image """ #colors = ['red', 'green', 'blue', 'yellow'] colors = ['red', 'green', 'blue'] flags = cv2.IMREAD_GRAYSCALE img = [ cv2.imread(os.path.join(path, filename + '_' + color + '.png'), flags).astype(np.float32) for color in colors ] img = [cv2.resize(x, (512, 512)) / 255 for x in img] img = np.stack(img, axis=-1) ##### do not normalize stats = None if not stats is None: m, s = stats img = transforms.Normalize(img, m, s) if train: img = transforms.RandomRotate(img, 30) img = transforms.RandomDihedral(img) img = transforms.RandomLighting(img, 0.05, 0.05) return img
def main(): args = parser.parse_args() step = 0 exp_name = f'{args.name}_{hp.max_lr}_{hp.cycle_length}' transforms = segtrans.JointCompose([segtrans.Resize(400), segtrans.RandomRotate(0, 90), segtrans.RandomCrop(256, 256), segtrans.ToTensor(), segtrans.Normalize(mean=hp.mean, std=hp.std)]) val_transforms = segtrans.JointCompose([segtrans.PadToFactor(), segtrans.ToTensor(), segtrans.Normalize(mean=hp.mean, std=hp.std)]) train_dataset = DSBDataset(f'{args.data}/train', transforms=transforms) val_dataset = DSBDataset(f'{args.data}/val', transforms=val_transforms) model = Unet() if args.checkpoint: checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['state']) step = checkpoint['step'] exp_name = checkpoint['exp_name'] optimizer = Adam(model.parameters(), lr=hp.max_lr) if args.find_lr: scheduler = LRFinderScheduler(optimizer) else: scheduler = SGDRScheduler(optimizer, min_lr=hp.min_lr, max_lr=hp.max_lr, cycle_length=hp.cycle_length, current_step=step) model.cuda(device=args.device) train(model, optimizer, scheduler, train_dataset, val_dataset, n_epochs=args.epochs, batch_size=args.batch_size, exp_name=exp_name, device=args.device, step=step)
def train(model, criterion, optimizer, lr_scheduler, data_dir, lr): """ docstring Parameters: ----------- model: pytorch model criterion: optimizer: lr_scheduler: data_dir: """ time_at_start = time.time() best_acc = 0.0 history = { "train_acc": [], "train_loss": [], "test_acc": [], "test_loss": [] } datasets = make_datasets(data_dir, transforms=transforms.RandomRotate()) dataloader = make_dataloaders(datasets) for epoch in range(NUM_EPOCHS): print("Epoch {}/{}".format(epoch, NUM_EPOCHS - 1)) print("=" * 10) for phase in ["train", "test"]: if phase == "train": optimizer = lr_scheduler(optimizer, epoch, lr) model.train(True) else: model.train(False) print("Setting model to {} mode".format(phase)) running_loss = 0.0 running_corrects = 0 len_data = len(datasets[phase]) for index, data in enumerate(dataloader[phase]): inputs, labels, _ = data if USE_GPU: inputs = torch.autograd.Variable(inputs.cuda()) labels = torch.autograd.Variable(labels.cuda()) else: inputs = torch.autograd.Variable(inputs) labels = torch.autograd.Variable(labels) # zero parameter gradients before the forward pass optimizer.zero_grad() outputs = model(inputs) _, preds = torch.max(outputs.data, 1) labels = labels.view(-1) loss = criterion(outputs, labels) if phase == "train": loss.backward() optimizer.step() running_loss += loss.data[0] running_corrects += torch.sum(preds == labels.data) if VERBOSE: print_model_stats(index, BATCH_SIZE, len_data, loss.data[0]) print("\n") # epoch stats epoch_loss = running_loss / len_data epoch_acc = running_corrects / len_data history["{}_acc".format(phase)].append(epoch_acc) history["{}_loss".format(phase)].append(epoch_loss) print("{} Loss: {:.4f} | Acc: {:.4f}".format( phase, epoch_loss, epoch_acc)) # checkpoint the best model based on validation accuracy if phase == "test" and epoch_acc > best_acc: best_acc = epoch_acc model_path = "{}_checkpoint".format(SAVE_PATH) print("checkpointing model at {}".format(model_path)) torch.save(model.state_dict(), model_path) # write history to JSON file history_path = "{}_history".format(SAVE_PATH) with open(history_path, "w") as f: json.dump(history, f, indent=4) time_elapsed = time.time() - time_at_start print("Training complete in {:.0f}m {:.0f}s".format( time_elapsed // 60, time_elapsed % 60)) print("Best validation accuracy: {:.4f}".format(best_acc))
def train_model(model, criterion, optimizer, lr_scheduler): """docstring""" history = {"train_acc": [], "train_loss": [], "test_acc": [], "test_loss": []} transform = transforms.RandomRotate() datasets = {} train_data = dataset.CSVDataset( data_dir=DATA_DIR, csv=TRAIN_CSV, complete_csv=DATAFRAME, transforms=transform ) train_data.equalise_groups("MoA", under_sample=False) datasets["train"] = train_data datasets["test"] = dataset.CSVDataset( data_dir=DATA_DIR, csv=TEST_CSV, complete_csv=DATAFRAME ) dataloader = {} dataloader["train"] = torch.utils.data.DataLoader( datasets["train"], batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True ) dataloader["test"] = torch.utils.data.DataLoader( datasets["test"], batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True ) for epoch in range(NUM_EPOCHS): print("Epoch {}/{}".format(epoch, NUM_EPOCHS-1)) print("="*10) # each epoch has training and validation phases optimizer = lr_scheduler(optimizer, epoch) model.train(True) running_loss = 0.0 running_corrects = 0 len_data = len(datasets["train"]) for data in tqdm(dataloader["train"]): # ignore parent_img labels during training, these are only needed in testing inputs, labels, _ = data if USE_GPU: inputs = torch.autograd.Variable(inputs).cuda() labels = torch.autograd.Variable(labels).cuda() else: inputs = torch.autograd.Variable(inputs) labels = torch.autograd.Variable(labels) # zero the parameter gradients before the forward pass optimizer.zero_grad() # forward pass outputs = model(inputs) _, preds = torch.max(outputs.data, 1) labels = labels.view(-1) loss = criterion(outputs, labels) # backprop if in the training phase loss.backward() optimizer.step() running_loss += loss.data[0] running_corrects += torch.sum(preds == labels.data) print("\n") # epoch stats for train and validation phases epoch_loss = running_loss / len_data epoch_acc = running_corrects / len_data history["train_acc"].append(epoch_acc) history["train_loss"].append(epoch_loss) print("Loss: {:.4f} | Acc: {:.4f}".format(epoch_loss, epoch_acc)) # write history dict as a JSON file at each epoch history_path = "{}_history".format(SAVE_PATH) with open(history_path, "w") as f: json.dump(history, f, indent=4) # Convert model to test mode and make predictions on test set # these need to be recorded so that the aggregate prediction # on all the cells in an image can be calculated. # # This can be done by grouping on the 'img_id' column in the .csv file print("\n") print("=" * 10) print("Testing") print("=" * 10) model.eval() parent_imgs = [] predictions_list = [] actual_vals = [] for data in tqdm(dataloader["test"]): test_inputs, test_labels, parent_img = data test_labels = list(test_labels.cpu().numpy()) if USE_GPU: test_inputs = torch.autograd.Variable(test_inputs).cuda() test_outputs = model(test_inputs) _, predictions = torch.max(test_outputs.data, 1) predictions = list(predictions.cpu().numpy()) parent_imgs.append(parent_img) predictions_list.extend(predictions) actual_vals.extend(test_labels) for i, j, k in zip(parent_imgs, predictions_list, actual_vals): print(i, j, k)
return image, label def __len__(self): assert self.trainDataSize == self.maskDataSize assert self.trainDataSize == self.dataBoxSize return self.trainDataSize if __name__ == '__main__': from torch.utils.data import DataLoader transforms = [ # Transforms.RandomCrop(2300, 2300), Transforms.RondomFlip(), Transforms.RandomRotate(15), Transforms.Log(0.5), Transforms.Blur(0.2), Transforms.ToTensor(), Transforms.ToGray() ] dataset = UNetDataset('./data/train', './data/train_cleaned', transform=transforms) dataLoader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=0) for index, (batch_x, batch_y) in enumerate(dataLoader): print(batch_x.size(),
def train(): # dataset transforms = [ Transforms.ToGray(), Transforms.RondomFlip(), Transforms.RandomRotate(15), Transforms.RandomCrop(48, 48), Transforms.Log(0.5), # Transforms.EqualizeHist(0.5), # Transforms.Blur(0.2), Transforms.ToTensor() ] dataset = UNetDataset('./data/train/', './data/train_cleaned/', transform=transforms) dataLoader = DataLoader(dataset=dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0) # init model net = UNet(1, 2).to(device) optimizer = optim.Adam(net.parameters(), lr=LR) loss_func = nn.CrossEntropyLoss().to(device) # load weight if os.path.exists(weight_with_optimizer): checkpoint = torch.load(weight_with_optimizer) net.load_state_dict(checkpoint['net']) optimizer.load_state_dict(checkpoint['optimizer']) print('load weight') # train for epoch in range(EPOCH): # train for step, (batch_x, batch_y) in enumerate(dataLoader): # import cv2 # import numpy as np # display = np.concatenate( # (batch_x[0][0].numpy(), batch_y[0][0].numpy().astype(np.float32)), # axis=1 # ) # cv2.imshow('display', display) # cv2.waitKey() batch_x = batch_x.to(device) batch_y = batch_y.squeeze(1).to(device) output = net(batch_x) loss = loss_func(output, batch_y) optimizer.zero_grad() loss.backward() optimizer.step() print('epoch: %d | loss: %.4f' % (epoch, loss.data.cpu())) # save weight if (epoch + 1) % 10 == 0: torch.save( { 'net': net.state_dict(), 'optimizer': optimizer.state_dict() }, weight_with_optimizer) torch.save({'net': net.state_dict()}, weight) print('saved')
def get_dataloaders(data_location, labels_file, modalities, wsi_patch_size=None, n_wsi_patches=None, batch_size=None, exclude_patients=None, return_patient_id=False): """Instantiate PyTorch DataLoaders. Parameters ---------- Returns ------- Dict of Pytorch Dataloaders. """ data_dirs = { 'clinical': os.path.join(data_location, 'Clinical'), 'wsi': os.path.join(data_location, 'WSI'), 'mRNA': os.path.join(data_location, 'RNA-seq'), 'miRNA': os.path.join(data_location, 'miRNA-seq'), 'DNAm': os.path.join(data_location, 'DNAm/5k'), 'CNV': os.path.join(data_location, 'CNV'), } data_dirs = {mod: data_dirs[mod] for mod in modalities} if batch_size is None: if 'wsi' in data_dirs.keys() and n_wsi_patches > 1: batch_size = 2**5 else: batch_size = 2**7 patient_labels = {'train': get_label_map(labels_file, 'train'), 'val': get_label_map(labels_file, 'val'), 'test': get_label_map(labels_file, 'test')} if 'wsi' in data_dirs.keys(): transforms = { 'train': torchvision.transforms.Compose([ patch_transforms.ToPIL(), torchvision.transforms.CenterCrop(wsi_patch_size), torchvision.transforms.ColorJitter( brightness=64/255, contrast=0.5, saturation=0.25, hue=0.04), patch_transforms.ToNumpy(), patch_transforms.RandomRotate(), patch_transforms.RandomFlipUpDown(), patch_transforms.ToTensor(), ]), # No data augmentation for validation 'val': torchvision.transforms.Compose([ patch_transforms.ToPIL(), torchvision.transforms.CenterCrop(wsi_patch_size), patch_transforms.ToNumpy(), patch_transforms.ToTensor(), ]), 'test': torchvision.transforms.Compose([ patch_transforms.ToPIL(), torchvision.transforms.CenterCrop(wsi_patch_size), patch_transforms.ToNumpy(), patch_transforms.ToTensor(), ])} else: transforms = {'train': None, 'val': None, 'test': None} datasets = {x: dataset.MultimodalDataset( label_map=patient_labels[x], data_dirs=data_dirs, n_patches=n_wsi_patches, patch_size=wsi_patch_size, transform=transforms[x], exclude_patients=exclude_patients, return_patient_id=return_patient_id) for x in ['train', 'val', 'test']} print('Data modalities:') for mod in modalities: print(' ', mod) print() print('Dataset sizes (# patients):') for x in datasets.keys(): print(f' {x}: {len(datasets[x])}') print() print('Batch size:', batch_size) # Use "drop_last=True" to drop the last incomplete batch # to avoid undefined loss values due to lack of sufficient # orderable observation pairs caused by data censorship # When running all data with batch = 64: # 8880 % 64 = 48 # When running 20 cancer data with batch = 64: # 7369 % 64 = 9 dataloaders = {'train': torch.utils.data.DataLoader( datasets['train'], batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True), 'val': torch.utils.data.DataLoader( datasets['val'], batch_size=batch_size * 2, shuffle=False, num_workers=4, drop_last=True), 'test': torch.utils.data.DataLoader( datasets['test'], batch_size=batch_size * 2, shuffle=False, num_workers=4, drop_last=True)} return dataloaders
def train(): transforms = [ Transforms.RondomFlip(), Transforms.RandomRotate(15), Transforms.Log(0.5), Transforms.Blur(0.2), Transforms.ToGray(), Transforms.ToTensor() ] train_dataset = UNetDataset('./data/train/', './data/train_cleaned/', transform=transforms) train_dataLoader = DataLoader(dataset=train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=0) valid_dataset = UNetDataset('./data/valid/', './data/valid_cleaned/', transform=transforms) valid_dataLoader = DataLoader(dataset=valid_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=0) net = UNet(n_channels=config.n_channels, n_classes=config.n_classes).to(config.device) writer = SummaryWriter() optimizer = optim.Adam(net.parameters(), lr=config.LR) if config.n_classes > 1: loss_func = nn.CrossEntropyLoss().to(config.device) else: loss_func = nn.BCEWithLogitsLoss().to(config.device) best_loss = float('inf') if os.path.exists(config.weight_with_optimizer): checkpoint = torch.load(config.weight_with_optimizer, map_location='cpu') net.load_state_dict(checkpoint['net']) optimizer.load_state_dict(checkpoint['optimizer']) print('load weight') for epoch in range(config.EPOCH): train_loss = 0 net.train() for step, (batch_x, batch_y) in enumerate(train_dataLoader): batch_x = batch_x.to(device=config.device) batch_y = batch_y.squeeze(1).to(device=config.device) output = net(batch_x) loss = loss_func(output, batch_y) train_loss += loss.item() if loss < best_loss: best_loss = loss torch.save( { 'net': net.state_dict(), 'optimizer': optimizer.state_dict() }, config.best_model_with_optimizer) torch.save({'net': net.state_dict()}, config.best_model) optimizer.zero_grad() loss.backward() optimizer.step() net.eval() eval_loss = 0 for step, (batch_x, batch_y) in enumerate(valid_dataLoader): batch_x = batch_x.to(device=config.device) batch_y = batch_y.squeeze(1).to(device=config.device) output = net(batch_x) valid_loss = loss_func(output, batch_y) eval_loss += valid_loss.item() writer.add_scalar("train_loss", train_loss, epoch) writer.add_scalar("eval_loss", eval_loss, epoch) print("*" * 80) print('epoch: %d | train loss: %.4f | valid loss: %.4f' % (epoch, train_loss, eval_loss)) print("*" * 80) if (epoch + 1) % 10 == 0: torch.save( { 'net': net.state_dict(), 'optimizer': optimizer.state_dict() }, config.weight_with_optimizer) torch.save({'net': net.state_dict()}, config.weight) print('saved') writer.close()
weights[foreground] = n * self.fg_weight / foreground.sum().item() mask = mask * weights img = torch.cat([img, self.mean[None]], dim=0) if self.flip: img, mask, lbl = random_flip(img, mask, lbl) return img, mask, lbl ROTATE_TRANS_1024 = tr.Compose([ tr.AspectPreservingResizeTransform((1024, 768)), tr.Lift(T.Pad(88)), tr.RandomRotate(), ]) PAD_TRANS_1024 = tr.Compose([ tr.AspectPreservingResizeTransform((1024, 768)), tr.Lift(T.Pad(88)), ]) RotatedISICDataset = rotated_dataset(ISICDataset) if __name__ == '__main__': target_size = 1024, 768 img_transform = T.Compose([T.ColorJitter(0.3, 0.3, 0.3, 0.), T.ToTensor()]) d = ISICDataset('/home/jatentaki/Storage/jatentaki/Datasets/isic2018',
'print_aux': True, } ckpt_path = './ckpt' exp_name = 'model' img_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.3598, 0.3653, 0.3662], [0.2573, 0.2663, 0.2756]) ]) mask_transform = extend_transforms.MaskToTensor() train_joint_transform = extend_transforms.Compose([ extend_transforms.RandomScale(), extend_transforms.RandomSizedRatio(760, 842, 274, 304), extend_transforms.RandomRotate(args['rotate_degree']), extend_transforms.RandomCrop(args['train_crop_size']), ]) train_set = culane.CULANE('train', joint_transform=train_joint_transform, transform=img_transform, mask_transform=mask_transform) train_loader = DataLoader(train_set, batch_size=args['train_batch_size'], num_workers=10, shuffle=True) criterion = CrossEntropyLoss2d(weight=torch.Tensor([0.4, 1, 1, 1, 1]).cuda(), size_average=True, ignore_index=culane.ignore_label,
print("Ignoring --epochs outside of training mode") if args.no_jit and args.optimize: print("Ignoring --optimize in --no-jit setting") writer.add_text('general', str(vars(args))) transform = T.Compose([T.CenterCrop(644), T.ToTensor()]) # if we are not padding the convolutions, we have to pad the input aug_pad = None if args.padding else tr.Lift(T.Pad(40)) test_global_transform = aug_pad tr_global_transform = tr.Compose( [tr.RandomRotate(), tr.RandomFlip(), aug_pad]) train_data = loader.DriveDataset(args.data_path, training=True, bloat=args.bloat, from_=args.cut, img_transform=transform, mask_transform=transform, label_transform=transform, global_transform=tr_global_transform) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) if args.test_on_train: