def run_epoch(epoch_idx, action, loader, model, optimizer): is_training = action == Action.TRAIN epoch_losses = [] model.train(is_training) for batch_idx, batch in enumerate(tqdm(loader)): inputs, targets = prepare_batch(batch, training_batch_size, device) optimizer.zero_grad() with torch.set_grad_enabled(is_training): logits = forward(model, inputs) probabilities = F.softmax(logits, dim=CHANNELS_DIMENSION) batch_losses = get_dice_loss(probabilities, targets) batch_loss = batch_losses.mean() if is_training: batch_loss.backward() #Calculate Gradients optimizer.step() #Update Weights # Log to tensorboard every 40 batches if batch_idx % 40 == 0: if is_training: writer.add_scalar('Loss/train', batch_loss.item(), epoch_idx * len(loader) + batch_idx) plot_2d_or_3d_image(logits.argmax(dim=CHANNELS_DIMENSION, keepdim=True), epoch_idx * len(loader) + batch_idx, writer, index=0, tag="Output Train") plot_2d_or_3d_image(inputs, epoch_idx * len(loader) + batch_idx, writer, index=0, tag="Input Train") #for name, param in model.named_parameters(): Histograms for the weights # writer.add_histogram(name, param, epoch_idx) #writer.add_histogram(f'{name}.grad', param.grad, epoch_idx) # Gives error sometimes. No names in the NN may be the cause else: writer.add_scalar('Loss/valid', batch_loss.item(), epoch_idx * len(loader) + batch_idx) plot_2d_or_3d_image(logits.argmax(dim=CHANNELS_DIMENSION, keepdim=True), epoch_idx * len(loader) + batch_idx, writer, index=0, tag="Output Val") plot_2d_or_3d_image(inputs, epoch_idx * len(loader) + batch_idx, writer, index=0, tag="Input Val") writer.flush() #print(f'{action.value} batch loss: {batch_loss.item():0.3f}') epoch_losses.append(batch_loss.detach().item()) epoch_losses = np.array(epoch_losses) print(f'{action.value} mean loss: {epoch_losses.mean():0.3f}')
def main(): monai.config.print_config() logging.basicConfig(stream=sys.stdout, level=logging.INFO) # create a temporary directory and 40 random image, mask paris tempdir = tempfile.mkdtemp() print(f"generating synthetic data to {tempdir} (this may take a while)") for i in range(40): im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1) n = nib.Nifti1Image(im, np.eye(4)) nib.save(n, os.path.join(tempdir, f"im{i:d}.nii.gz")) n = nib.Nifti1Image(seg, np.eye(4)) nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz")) images = sorted(glob(os.path.join(tempdir, "im*.nii.gz"))) segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz"))) # define transforms for image and segmentation train_imtrans = Compose( [ ScaleIntensity(), AddChannel(), RandSpatialCrop((96, 96, 96), random_size=False), RandRotate90(prob=0.5, spatial_axes=(0, 2)), ToTensor(), ] ) train_segtrans = Compose( [ AddChannel(), RandSpatialCrop((96, 96, 96), random_size=False), RandRotate90(prob=0.5, spatial_axes=(0, 2)), ToTensor(), ] ) val_imtrans = Compose([ScaleIntensity(), AddChannel(), ToTensor()]) val_segtrans = Compose([AddChannel(), ToTensor()]) # define nifti dataset, data loader check_ds = NiftiDataset(images, segs, transform=train_imtrans, seg_transform=train_segtrans) check_loader = DataLoader(check_ds, batch_size=10, num_workers=2, pin_memory=torch.cuda.is_available()) im, seg = monai.utils.misc.first(check_loader) print(im.shape, seg.shape) # create a training data loader train_ds = NiftiDataset(images[:20], segs[:20], transform=train_imtrans, seg_transform=train_segtrans) train_loader = DataLoader(train_ds, batch_size=4, shuffle=True, num_workers=8, pin_memory=torch.cuda.is_available()) # create a validation data loader val_ds = NiftiDataset(images[-20:], segs[-20:], transform=val_imtrans, seg_transform=val_segtrans) val_loader = DataLoader(val_ds, batch_size=1, num_workers=4, pin_memory=torch.cuda.is_available()) # create UNet, DiceLoss and Adam optimizer device = torch.device("cuda:0") model = monai.networks.nets.UNet( dimensions=3, in_channels=1, out_channels=1, channels=(16, 32, 64, 128, 256), strides=(2, 2, 2, 2), num_res_units=2, ).to(device) loss_function = monai.losses.DiceLoss(do_sigmoid=True) optimizer = torch.optim.Adam(model.parameters(), 1e-3) # start a typical PyTorch training val_interval = 2 best_metric = -1 best_metric_epoch = -1 epoch_loss_values = list() metric_values = list() writer = SummaryWriter() for epoch in range(5): print("-" * 10) print(f"epoch {epoch + 1}/{5}") model.train() epoch_loss = 0 step = 0 for batch_data in train_loader: step += 1 inputs, labels = batch_data[0].to(device), batch_data[1].to(device) optimizer.zero_grad() outputs = model(inputs) loss = loss_function(outputs, labels) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_len = len(train_ds) // train_loader.batch_size print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}") writer.add_scalar("train_loss", loss.item(), epoch_len * epoch + step) epoch_loss /= step epoch_loss_values.append(epoch_loss) print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}") if (epoch + 1) % val_interval == 0: model.eval() with torch.no_grad(): metric_sum = 0.0 metric_count = 0 val_images = None val_labels = None val_outputs = None for val_data in val_loader: val_images, val_labels = val_data[0].to(device), val_data[1].to(device) roi_size = (96, 96, 96) sw_batch_size = 4 val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model) value = compute_meandice( y_pred=val_outputs, y=val_labels, include_background=True, to_onehot_y=False, add_sigmoid=True ) metric_count += len(value) metric_sum += value.sum().item() metric = metric_sum / metric_count metric_values.append(metric) if metric > best_metric: best_metric = metric best_metric_epoch = epoch + 1 torch.save(model.state_dict(), "best_metric_model.pth") print("saved new best metric model") print( "current epoch: {} current mean dice: {:.4f} best mean dice: {:.4f} at epoch {}".format( epoch + 1, metric, best_metric, best_metric_epoch ) ) writer.add_scalar("val_mean_dice", metric, epoch + 1) # plot the last model output as GIF image in TensorBoard with the corresponding image and label plot_2d_or_3d_image(val_images, epoch + 1, writer, index=0, tag="image") plot_2d_or_3d_image(val_labels, epoch + 1, writer, index=0, tag="label") plot_2d_or_3d_image(val_outputs, epoch + 1, writer, index=0, tag="output") shutil.rmtree(tempdir) print(f"train completed, best_metric: {best_metric:.4f} at epoch: {best_metric_epoch}") writer.close()
metric_sum += value.sum().item() metric = metric_sum / metric_count metric_values.append(metric) if metric > best_metric: best_metric = metric best_metric_epoch = epoch + 1 torch.save(model.state_dict(), 'best_metric_model.pth') print('saved new best metric model') print( 'current epoch: {} current mean dice: {:.4f} best mean dice: {:.4f} at epoch {}' .format(epoch + 1, metric, best_metric, best_metric_epoch)) writer.add_scalar('val_mean_dice', metric, epoch + 1) # plot the last model output as GIF image in TensorBoard with the corresponding image and label plot_2d_or_3d_image(val_images, epoch + 1, writer, index=0, tag='image') plot_2d_or_3d_image(val_labels, epoch + 1, writer, index=0, tag='label') plot_2d_or_3d_image(val_outputs, epoch + 1, writer, index=0, tag='output') shutil.rmtree(tempdir) print('train completed, best_metric: {:.4f} at epoch: {}'.format( best_metric, best_metric_epoch))