def __init__(self, dir_dict, args): self.args=args self.augment = args['augment'] self.train_ratio = float(args['train_ratio']) self.shuffle = args['shuffle'] self.n_classes = int(args['n_classes']) self.n_channels = int(args['in_channels']) self.learning_rate = float(args['learning_rate']) self.epochs = int(args['epochs']) self.momentum_factor = float(args['momentum_factor']) self.weight_decay = float(args['weight_decay']) self.grid_clip_by_value = float(args['grad_clip_by_value']) self.save = args['save'] self.save_freq = int(args['save_freq']) self.confidence_threshold = float(args['confidence_threshold']) self.lr_decay_step = int(args['lr_decay_step']) self.dir_dict = dir_dict # to GPU self.cuda_gpu = torch.cuda.is_available() # load dataset self.bulid_dataset() # build model if self.cuda_gpu: self.model = unet_model.UNet(self.n_channels, self.n_classes).cuda() else: self.model = unet_model.UNet(self.n_channels, self.n_classes) # build optimizer self.build_optimiezer()
def __init__(self,args,train=True,chkpoint=None): def init_weights(m): if isinstance(m,nn.Linear) or isinstance(m,nn.Conv2d): torch.nn.init.xavier_uniform_(m.weight.data) m.bias.data.fill_(0.01) #INITIALIZE HYPER PARAMS self.device = args.device self.ACTION_SPACE = args.action_space if train: self.steps = 0 self.BATCH_SIZE = args.batch_size self.GAMMA = args.gamma self.EPS_START = args.eps_start self.EPS_END = args.eps_end self.EPS_DECAY = args.eps_decay self.TARGET_UPDATE = args.target_update #INITIALIZE THE MODELS #self.model = Model(self.ACTION_SPACE) self.model1 = unet_model.UNet(self.ACTION_SPACE,upsize=args.upsize) self.model2 = unet_model.UNet(self.ACTION_SPACE,upsize=args.upsize) self.model1.to(self.device) self.model2.to(self.device) #if chkpoint: # print('Agent Loaded at checkpoint!') # self.model.load_state_dict(chkpoint['agent']) self.opt1 = torch.optim.Adam(self.model1.parameters(),lr=1e-4) self.opt2 = torch.optim.Adam(self.model2.parameters(),lr=1e-4)
def main(): num_epochs = 30 dev = torch.device('cuda') model = unet_model.UNet(n_channels=3, n_classes=3, bilinear=True).to(device=dev) dataset = MyDataset('kodak', transform=transforms.Compose( [Resize((512, 768)), Normalize(), ToTensor()])) dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=0, pin_memory=True) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) loss = np.zeros(num_epochs) psnr = np.zeros(num_epochs) for epoch in tqdm.tqdm(range(num_epochs)): loss[epoch], psnr[epoch] = train_loop(model, dataloader, optimizer, dev) print(loss[epoch], psnr[epoch]) scheduler.step() torch.save(model.state_dict(), "model_weights.pt") data = np.vstack((np.arange(num_epochs), loss, psnr)).T np.savetxt("train_data.dat", data)
def segmentation(file_dir): with tf.Graph().as_default() as g: # rawNameList, segNameList = unet_input.GetFileNameList(VAL_DIR) # TOTAL_VALID_IMAGES = len(rawNameList) raw_image = GetBatch(rawNameList) # Build computational graph seg_image = unet_model.UNet(raw_image) saver = tf.train.Saver() #summ_op = tf.summary.merge_all() #summ_writer = tf.summary.FileWriter(PARAMS.val_log_dir, g) while True: SegAndSaveImages(saver, raw_image, seg_image) break
def Evaluate(): with tf.Graph().as_default() as g: # rawNameList, segNameList = unet_input.GetFileNameList(VAL_DIR) # TOTAL_VALID_IMAGES = len(rawNameList) raw_image, seg_image = unet_input.GetBatchFromFile_Valid(rawNameList, segNameList, VAL_BATCH_SIZE) # Build computational graph seg_model = unet_model.UNet(raw_image) #raw_image = tf.image.resize_bicubic(raw_image, [OUTPUT_SIZE, OUTPUT_SIZE]) saver = tf.train.Saver() summ_op = tf.summary.merge_all() summ_writer = tf.summary.FileWriter(PARAMS.val_log_dir, g) while True: if PARAMS.eval_once: EvaluateOnceAndSaveImages(saver, summ_writer, summ_op, seg_image, raw_image, seg_model) break else: EvaluateOnce(saver, summ_writer, summ_op, seg_image, raw_image, seg_model) time.sleep(VAL_INTERVAL_SECS)
def main(nepochs, lr): print(nepochs, lr) files_t = [f'/nobackup/sccsb/radar/201807{d:02}{h:02}{m:02}_nimrod_ng_radar_rainrate_composite_1km_UK' \ for m in range(0,60,5) for h in range(6,9) for d in range(27,31)] train_loader = common.prep_data_uk(files_t) files_v = [f'/nobackup/sccsb/radar/201807{d:02}{h:02}{m:02}_nimrod_ng_radar_rainrate_composite_1km_UK' \ for m in range(0,60,5) for h in range(10,13) for d in range(27,31)] val_loader = common.prep_data_uk(files_v) unet = model.UNet(n_channels=3, n_classes=1) trained_net = train_net(unet, train_loader, val_loader, batch_size=100, n_epochs=nepochs, learning_rate=lr) torch.save(trained_net.state_dict(), 'milesial_unet_{}ep_{}lr.pt'.format(str(nepochs), str(lr)))
} transform = transforms.Compose([ transforms.ToPILImage(mode=None), transforms.Resize(size=(224, 224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) fx = 572.41140 px = 325.26110 fy = 573.57043 py = 242.04899 # Intrinsic Parameters of the Camera intrinsic_matrix = np.array([[fx, 0, px], [0, fy, py], [0, 0, 1]]) correspondence_block = UNET.UNet(n_channels=3, out_channels_id=16, out_channels_uv=256, bilinear=True) # load the best weights from the training loop correspondence_block.load_state_dict( torch.load('correspondence_block.pt', map_location=torch.device('cpu'))) pose_refiner = Pose_Refiner() # load the best weights from the training loop pose_refiner.load_state_dict( torch.load('pose_refiner.pt', map_location=torch.device('cpu'))) correspondence_block.cuda() pose_refiner.cuda() pose_refiner.eval() correspondence_block.eval() list_all_images = load_obj(root_dir + "all_images_adr")
def create_refinement_inputs(root_dir, train_eval_dir, classes, intrinsic_matrix): correspondence_block = UNET.UNet(n_channels=3, out_channels_id=14, out_channels_uv=256, bilinear=True) correspondence_block.cuda() correspondence_block_filename = os.path.join(train_eval_dir, 'correspondence_block.pt') correspondence_block.load_state_dict( torch.load(correspondence_block_filename, map_location=torch.device('cpu'))) train_data = LineMODDataset(root_dir, classes=classes, transform=transforms.Compose( [transforms.ToTensor()])) upsampled = nn.Upsample(size=[240, 320], mode='bilinear', align_corners=False) regex = re.compile(r'\d+') count = 0 for i in range(len(train_data)): if i % 1000 == 0: print(str(i) + "/" + str(len(train_data)) + " finished!") img_adr, img, _, _, _ = train_data[i] label = os.path.split(os.path.split(os.path.dirname(img_adr))[0])[1] idx = regex.findall(os.path.split(img_adr)[1])[0] adr_rendered = root_dir + label + \ "/pose_refinement/rendered/color" + str(idx) + ".png" adr_img = root_dir + label + \ "/pose_refinement/real/color" + str(idx) + ".png" # find the object in the image using the idmask img = img.view(1, img.shape[0], img.shape[1], img.shape[2]) idmask_pred, _, _ = correspondence_block(img.cuda()) idmask = torch.argmax(idmask_pred, dim=1).squeeze().cpu() coord_2d = (idmask == classes[label]).nonzero(as_tuple=True) if coord_2d[0].nelement() != 0: coord_2d = torch.cat( (coord_2d[0].view(coord_2d[0].shape[0], 1), coord_2d[1].view( coord_2d[1].shape[0], 1)), 1) min_x = coord_2d[:, 0].min() max_x = coord_2d[:, 0].max() min_y = coord_2d[:, 1].min() max_y = coord_2d[:, 1].max() img = img.squeeze().transpose(1, 2).transpose(0, 2) obj_img = img[min_x:max_x + 1, min_y:max_y + 1, :] # saving in the correct format using upsampling obj_img = obj_img.transpose(0, 1).transpose(0, 2).unsqueeze(dim=0) obj_img = upsampled(obj_img) obj_img = obj_img.squeeze().transpose(0, 2).transpose(0, 1) # It appears obj_img occasionally has values slightly larger than 1.0. # So I'm assuming that we're only clamping by a slight amount here. # If that's not the case, we need to scale. obj_img = torch.clamp(obj_img, 0.0, 1.0) mpimg.imsave(adr_img, obj_img.squeeze().numpy()) # create rendering for an object cropped_rendered_image = create_rendering(root_dir, intrinsic_matrix, label, idx) rendered_img = torch.from_numpy(cropped_rendered_image) rendered_img = rendered_img.unsqueeze(dim=0) rendered_img = rendered_img.transpose(1, 3).transpose(2, 3) rendered_img = upsampled(rendered_img) rendered_img = rendered_img.squeeze().transpose(0, 2).transpose(0, 1) mpimg.imsave(adr_rendered, rendered_img.numpy()) else: # object not present in idmask prediction count += 1 mpimg.imsave(adr_rendered, np.zeros((240, 320))) mpimg.imsave(adr_img, np.zeros((240, 320))) print("Number of outliers: ", count)
def train_correspondence_block(root_dir, classes, epochs=10): train_data = LineMODDataset(root_dir, classes=classes, transform=transforms.Compose([ transforms.ToTensor(), transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0) ])) batch_size = 4 num_workers = 0 valid_size = 0.2 # obtain training indices that will be used for validation num_train = len(train_data) indices = list(range(num_train)) np.random.shuffle(indices) split = int(np.floor(valid_size * num_train)) train_idx, valid_idx = indices[split:], indices[:split] # define samplers for obtaining training and validation batches train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) # prepare data loaders (combine dataset and sampler) train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers) valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers) # architecture for correspondence block - 13 objects + backgound = 14 channels for ID masks correspondence_block = UNET.UNet(n_channels=3, out_channels_id=14, out_channels_uv=256, bilinear=True) correspondence_block.cuda() # custom loss function and optimizer criterion_id = nn.CrossEntropyLoss() criterion_u = nn.CrossEntropyLoss() criterion_v = nn.CrossEntropyLoss() # specify optimizer optimizer = optim.Adam(correspondence_block.parameters(), lr=3e-4, weight_decay=3e-5) # training loop # number of epochs to train the model n_epochs = epochs valid_loss_min = np.Inf # track change in validation loss for epoch in range(1, n_epochs + 1): # keep track of training and validation loss train_loss = 0.0 valid_loss = 0.0 print("------ Epoch ", epoch, " ---------") ################### # train the model # ################### correspondence_block.train() for _, image, idmask, umask, vmask in train_loader: # move tensors to GPU if CUDA is available image, idmask, umask, vmask = image.cuda(), idmask.cuda( ), umask.cuda(), vmask.cuda() # clear the gradients of all optimized variables optimizer.zero_grad() # forward pass: compute predicted outputs by passing inputs to the model idmask_pred, umask_pred, vmask_pred = correspondence_block(image) # calculate the batch loss loss_id = criterion_id(idmask_pred, idmask) loss_u = criterion_u(umask_pred, umask) loss_v = criterion_v(vmask_pred, vmask) loss = loss_id + loss_u + loss_v # backward pass: compute gradient of the loss with respect to model parameters loss.backward() # perform a single optimization step (parameter update) optimizer.step() # update training loss train_loss += loss.item() ###################### # validate the model # ###################### correspondence_block.eval() for _, image, idmask, umask, vmask in valid_loader: # move tensors to GPU if CUDA is available image, idmask, umask, vmask = image.cuda(), idmask.cuda( ), umask.cuda(), vmask.cuda() # forward pass: compute predicted outputs by passing inputs to the model idmask_pred, umask_pred, vmask_pred = correspondence_block(image) # calculate the batch loss loss_id = criterion_id(idmask_pred, idmask) loss_u = criterion_u(umask_pred, umask) loss_v = criterion_v(vmask_pred, vmask) loss = loss_id + loss_u + loss_v # update average validation loss valid_loss += loss.item() # calculate average losses train_loss = train_loss / len(train_loader.sampler) valid_loss = valid_loss / len(valid_loader.sampler) # print training/validation statistics print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'. format(epoch, train_loss, valid_loss)) # save model if validation loss has decreased if valid_loss <= valid_loss_min: print( 'Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...' .format(valid_loss_min, valid_loss)) torch.save(correspondence_block.state_dict(), 'correspondence_block.pt') valid_loss_min = valid_loss
def initial_pose_estimation(root_dir, train_eval_dir, classes, intrinsic_matrix): for c in classes: class_pred_pose_fname = os.path.join(train_eval_dir, c, "predicted_pose") if not os.path.exists(class_pred_pose_fname): os.makedirs(class_pred_pose_fname) # LineMOD Dataset train_data = LineMODDataset(root_dir, train_eval_dir, classes=classes, transform=transforms.Compose( [transforms.ToTensor()])) # load the best correspondence block weights correspondence_block = UNET.UNet(n_channels=3, out_channels_id=14, out_channels_uv=256, bilinear=True) correspondence_block.cuda() correspondence_block_filename = os.path.join(train_eval_dir, 'correspondence_block.pt') correspondence_block.load_state_dict( torch.load(correspondence_block_filename, map_location=torch.device('cpu'))) # initial 6D pose prediction regex = re.compile(r'\d+') outliers = 0 for i in range(len(train_data)): if i % 100 == 0: print(str(i) + "/" + str(len(train_data)) + " finished!") img_adr, img, idmask, _, _ = train_data[i] label = os.path.split(os.path.split(os.path.dirname(img_adr))[0])[1] idx = regex.findall(os.path.split(img_adr)[1])[0] img = img.view(1, img.shape[0], img.shape[1], img.shape[2]) idmask_pred, umask_pred, vmask_pred = correspondence_block(img.cuda()) # convert the masks to 240,320 shape temp = torch.argmax(idmask_pred, dim=1).squeeze().cpu() upred = torch.argmax(umask_pred, dim=1).squeeze().cpu() vpred = torch.argmax(vmask_pred, dim=1).squeeze().cpu() coord_2d = (temp == classes[label]).nonzero(as_tuple=True) adr = os.path.join( train_eval_dir, label + "/predicted_pose/" + "info_" + str(idx) + ".txt") coord_2d = torch.cat( (coord_2d[0].view(coord_2d[0].shape[0], 1), coord_2d[1].view( coord_2d[1].shape[0], 1)), 1) uvalues = upred[coord_2d[:, 0], coord_2d[:, 1]] vvalues = vpred[coord_2d[:, 0], coord_2d[:, 1]] dct_keys = torch.cat((uvalues.view(-1, 1), vvalues.view(-1, 1)), 1) dct_keys = tuple(dct_keys.numpy()) dct = load_obj(os.path.join(root_dir, label + "/UV-XYZ_mapping")) mapping_2d = [] mapping_3d = [] for count, (u, v) in enumerate(dct_keys): if (u, v) in dct: mapping_2d.append(np.array(coord_2d[count])) mapping_3d.append(dct[(u, v)]) # Get the 6D pose from rotation and translation matrices # PnP needs atleast 6 unique 2D-3D correspondences to run if len(mapping_2d) >= 4 or len(mapping_3d) >= 4: _, rvecs, tvecs, inliers = cv2.solvePnPRansac( np.array(mapping_3d, dtype=np.float32), np.array(mapping_2d, dtype=np.float32), intrinsic_matrix, distCoeffs=None, iterationsCount=150, reprojectionError=1.0, flags=cv2.SOLVEPNP_P3P) rot, _ = cv2.Rodrigues(rvecs, jacobian=None) rot[np.isnan(rot)] = 1 tvecs[np.isnan(tvecs)] = 1 tvecs = np.where(-100 < tvecs, tvecs, np.array([-100.])) tvecs = np.where(tvecs < 100, tvecs, np.array([100.])) rot_tra = np.append(rot, tvecs, axis=1) # save the predicted pose np.savetxt(adr, rot_tra) else: # save a pose full of zeros outliers += 1 rot_tra = np.ones((3, 4)) rot_tra[:, 3] = 0 np.savetxt(adr, rot_tra) print("Number of instances where PnP couldn't be used: ", outliers)
def main(mriEncoderPath, usEncoderPath): numEpochs, batchSize = 12, 4 # Use GPU if available use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") # create visdom logger instance logger = vl.VisdomLogger(port="65531") logger.deleteWindow(win="test") # Initialize the MRI US dataset and the dataset loader dir = os.path.dirname(__file__) modelPath = os.path.join(dir, '..', 'savedModels/transformation') trainDatasetDir = '../dataset/sintefHdf5Volumes/train' trainDataset = dataset.SintefDatasetValTest(rootDir=trainDatasetDir, mriFilePrefix='T1-Vol', usFilePrefix='US-DS') valDatasetDir = '../dataset/sintefHdf5Volumes/val' validationDataset = dataset.SintefDatasetValTest(rootDir=valDatasetDir, mriFilePrefix='T1-Vol', usFilePrefix='US-DS') dataloader = batchloader.DataLoader(trainDataset, batch_size=batchSize, shuffle=True, num_workers=4) #create transformation models and add them to optimizer mriForwardmodel = unetModel.UNet(n_classes=1, n_channels=1).to(device) usForwardmodel = unetModel.UNet(n_classes=1, n_channels=1).to(device) networkParams = list(mriForwardmodel.parameters()) + list( usForwardmodel.parameters()) optimizer = torch.optim.Adam(networkParams, lr=1e-4) #, weight_decay=1e-9) #Optimizer learning rate decay. scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98) #Load encoder models mriEncoderModel = AutoEncoder().to(device) usEncoderModel = AutoEncoder().to(device) mriEncoderModel.load_state_dict(torch.load(mriEncoderPath)) usEncoderModel.load_state_dict(torch.load(usEncoderPath)) mriEncoderModel.eval() usEncoderModel.eval() # create instance of loss class l2Loss = torch.nn.MSELoss() l1Loss = torch.nn.L1Loss() iteration = 0 for epochNum in range(numEpochs): for i_batch, sample_batched in enumerate(dataloader): mri = sample_batched['mri'].to(device) us = sample_batched['us'].to(device) rotatedMris = sample_batched['rotatedMris'].to(device) # perform optimization optimizer.zero_grad() # Compute the shared representation mriOut = mriForwardmodel(mri) usOut = usForwardmodel(us) rotatedMrisOut = mriForwardmodel( rotatedMris.view(rotatedMris.shape[0] * rotatedMris.shape[1], 1, rotatedMris.shape[2], rotatedMris.shape[3])) rotatedMrisOut = rotatedMrisOut.view(rotatedMris.shape[0], rotatedMris.shape[1], rotatedMris.shape[2], rotatedMris.shape[3]) # Compute the encoded Representations (_, mriOutEncoded, _, _) = mriEncoderModel(mriOut, get_features=True) (_, usOutEncoded, _, _) = usEncoderModel(usOut, get_features=True) (_, mriEncoded, _, _) = mriEncoderModel(mri, get_features=True) (_, usEncoded, _, _) = usEncoderModel(us, get_features=True) # Calculate Losses (totalLoss, mriUsOutDiffLoss, mriEnLoss, usEnLoss, regLoss) = calculateTotalLoss(mriOut, usOut, rotatedMrisOut, mriOutEncoded, usOutEncoded, mriEncoded, usEncoded, l2Loss, l1Loss) # perform backward pass totalLoss.backward() optimizer.step() # print Epoch number and Loss print("Epoch: ", epochNum, "iteration: ", i_batch, "Total loss: ", totalLoss.item(), "MriUSLoss: ", mriUsOutDiffLoss.item(), "MriEncoderLoss: ", mriEnLoss.item(), "UsEncoderLoss:", usEnLoss.item(), "RegistrationLoss", regLoss.item()) iteration = iteration + 1 logger.appendLine(X=np.array([iteration]), Y=np.array([totalLoss.item()]), win="test", name="Total Loss", xlabel="Iteration Number", ylabel="Loss") logger.appendLine(X=np.array([iteration]), Y=np.array([mriUsOutDiffLoss.item()]), win="test", name="MRI US Diff Loss", xlabel="Iteration Number", ylabel="Loss") logger.appendLine(X=np.array([iteration]), Y=np.array([mriEnLoss.item()]), win="test", name="MRI Encoder Loss", xlabel="Iteration Number", ylabel="Loss") logger.appendLine(X=np.array([iteration]), Y=np.array([usEnLoss.item()]), win="test", name="US Encoder Loss", xlabel="Iteration Number", ylabel="Loss") logger.appendLine(X=np.array([iteration]), Y=np.array([regLoss.item()]), win="test", name="Registration Loss", xlabel="Iteration Number", ylabel="Loss") if iteration % 20 == 0: usValIn = torch.Tensor([]) mriValIn = torch.Tensor([]) rotatedMrisValIn = torch.Tensor([]) #switch model to eval mode for validation mriForwardmodel.eval() usForwardmodel.eval() # Show output on a validation image after some iterations valBatchSize = 5 valSetIds = np.random.randint(low=1, high=validationDataset.__len__(), size=valBatchSize) # Make a concatenated tensor of inputs for i in range(valBatchSize): valData = validationDataset[valSetIds[i]] usValIn = torch.cat((usValIn, valData['us'].unsqueeze(0)), 0) mriValIn = torch.cat( (mriValIn, valData['mri'].unsqueeze(0)), 0) rotatedMrisValIn = torch.cat( (rotatedMrisValIn, valData['rotatedMris'].unsqueeze(0)), 0) usValIn = usValIn.to(device) mriValIn = mriValIn.to(device) rotatedMrisValIn = rotatedMrisValIn.to(device) mriValOut = mriForwardmodel(mriValIn) usValOut = usForwardmodel(usValIn) rotatedMrisValOut = mriForwardmodel( rotatedMrisValIn.view( rotatedMrisValIn.shape[0] * rotatedMrisValIn.shape[1], 1, rotatedMrisValIn.shape[2], rotatedMrisValIn.shape[3])) rotatedMrisValOut = rotatedMrisValOut.view( rotatedMrisValIn.shape[0], rotatedMrisValIn.shape[1], rotatedMrisValIn.shape[2], rotatedMrisValIn.shape[3]) # Compute the encoded Representations (_, mriValOutEncoded, _, _) = mriEncoderModel(mriValOut, get_features=True) (_, usValOutEncoded, _, _) = usEncoderModel(usValOut, get_features=True) (_, mriValEncoded, _, _) = mriEncoderModel(mriValIn, get_features=True) (_, usValEncoded, _, _) = usEncoderModel(usValIn, get_features=True) #viewActivations(mriValIn, mriEncoderModel, logger, "MRI In Activations") #viewActivations(usValIn, usEncoderModel, logger, "US In Activations") #viewActivations(mriValOut, mriEncoderModel, logger, "MRI Out Activations") #viewActivations(usValOut, usEncoderModel, logger, "US Out Activations") """ print('') print("MRI running mean ", mriForwardmodel.batchNorm.running_mean) print('') print("US running mean ", usForwardmodel.batchNorm.running_mean) print('') print("MRI running var ", mriForwardmodel.batchNorm.running_var) print('') print("US running var ", usForwardmodel.batchNorm.running_var) print('') """ # find and plot loss on the validation batch also Calculate Losses (totalLossVal, mriUsValOutDiffLoss, _, _, _) = calculateTotalLoss(mriValOut, usValOut, rotatedMrisValOut, mriValOutEncoded, usValOutEncoded, mriValEncoded, usValEncoded, l2Loss, l1Loss) logger.appendLine(X=np.array([iteration]), Y=np.array([mriUsValOutDiffLoss.item()]), win="test", name="[VAL] MRI US Diff Loss", xlabel="Iteration Number", ylabel="Loss") logger.appendLine(X=np.array([iteration]), Y=np.array([totalLossVal.item()]), win="test", name="[VAL] Total Loss", xlabel="Iteration Number", ylabel="Loss") mriDisplayImages = torch.cat( (mriValIn, rangeNormalize(mriValOut)[0]), 0) usDisplayImages = torch.cat( (usValIn, rangeNormalize(usValOut)[0]), 0) mriTrainDisplayImages = torch.cat( (mri, rangeNormalize(mriOut)[0]), 0) usTrainDisplayImages = torch.cat( (us, rangeNormalize(usOut)[0]), 0) # plot images in visdom in a grid, MRI and US has different grids. logger.plotImages(images=mriDisplayImages, nrow=valBatchSize, win="mriGrid", caption='MRI Validation') logger.plotImages(images=usDisplayImages, nrow=valBatchSize, win="usGrid", caption='US Validation') logger.plotImages(images=mriTrainDisplayImages, nrow=batchSize, win="trainMRI", caption='MRI Train') logger.plotImages(images=usTrainDisplayImages, nrow=batchSize, win="trainUS", caption='US Train') #switch back to train mode usForwardmodel.train() mriForwardmodel.train() if epochNum % 1 == 0: print("Saved model ", iteration) torch.save( mriForwardmodel.state_dict(), os.path.join(modelPath, "mriForward" + str(epochNum) + ".pth")) torch.save( usForwardmodel.state_dict(), os.path.join(modelPath, "usForward" + str(epochNum) + ".pth")) #Learning rate decay after every Epoch optimizer.step() print("Training ended")
def train_correspondence_block(root_dir, train_eval_dir, classes, epochs=10, batch_size=4, \ accum_grad_batch_size=1, out_path_and_name=None, corr_transfer=None): train_data = LineMODDataset(root_dir, train_eval_dir, classes=classes, transform=transforms.Compose([ transforms.ToTensor(), transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0) ])) num_workers = 0 valid_size = 0.2 # obtain training indices that will be used for validation num_train = len(train_data) indices = list(range(num_train)) np.random.shuffle(indices) split = int(np.floor(valid_size * num_train)) train_idx, valid_idx = indices[split:], indices[:split] # define samplers for obtaining training and validation batches train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) # prepare data loaders (combine dataset and sampler) train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers) valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers) # architecture for correspondence block - 13 objects + backgound = 14 channels for ID masks correspondence_block = UNET.UNet(n_channels=3, out_channels_id=14, out_channels_uv=256, bilinear=True) if corr_transfer: print("Initializing correspondence block from: %s" % corr_transfer) correspondence_block.load_state_dict( torch.load(corr_transfer, map_location=torch.device('cpu'))) correspondence_block.cuda() if 0: from torchsummary import summary summary(correspondence_block, input_size=(3, 240, 320)) sys.exit(1) # custom loss function and optimizer weight_classes = False if weight_classes: # Using weighted version for class mask as mentioned in the paper # However, not sure what the weighting is, so taking a guess # Note we don't need to normalize when using the default 'reduction' arg class_weights = np.ones(len(classes) + 1) # +1 for background class_weights[0] = 0.1 criterion_id = nn.CrossEntropyLoss( torch.tensor(class_weights, dtype=torch.float32).cuda()) else: criterion_id = nn.CrossEntropyLoss() criterion_u = nn.CrossEntropyLoss() criterion_v = nn.CrossEntropyLoss() # specify optimizer optimizer = optim.Adam(correspondence_block.parameters(), lr=3e-4, weight_decay=3e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, verbose=True) # number of epochs to train the model n_epochs = epochs # track change in validation loss valid_loss_min = np.Inf if accum_grad_batch_size != 1: print("Gradient accumulator batch size: %i" % accum_grad_batch_size) # training loop for epoch in range(1, n_epochs + 1): # keep track of training and validation loss train_loss = 0.0 valid_loss = 0.0 train_idmask_loss = train_umask_loss = train_vmask_loss = 0.0 valid_idmask_loss = valid_umask_loss = valid_vmask_loss = 0.0 print("------ Epoch ", epoch, " ---------") print("Training...") ################### # train the model # ################### batch_cnt = 0 correspondence_block.train() # clear the gradients of all optimized variables optimizer.zero_grad() for img_adr, image, idmask, umask, vmask in train_loader: assert image.shape[1] == correspondence_block.n_channels, \ f'Network has been defined with {correspondence_block.n_channels} input channels, ' \ f'but loaded images have {image.shape[1]} channels. Please check that ' \ 'the images are loaded correctly.' if batch_cnt % 100 == 0: print("Batch %i/%i finished!" % (batch_cnt, len(train_idx) / batch_size)) # move tensors to GPU if CUDA is available image, idmask, umask, vmask = image.cuda(), idmask.cuda( ), umask.cuda(), vmask.cuda() # forward pass: compute predicted outputs by passing inputs to the model idmask_pred, umask_pred, vmask_pred = correspondence_block(image) # Calculate the batch loss # Need to divide by accum_grad_batch_size to account for accumulated gradients loss_id = criterion_id(idmask_pred, idmask) / accum_grad_batch_size loss_u = criterion_u(umask_pred, umask) / accum_grad_batch_size loss_v = criterion_v(vmask_pred, vmask) / accum_grad_batch_size total_loss = loss_id + loss_u + loss_v # backward pass: compute gradient of the loss with respect to model parameters total_loss.backward() # update training loss # Note that .item() also releases the memory. DON'T accumulate the criterion obj train_idmask_loss += loss_id.item() train_umask_loss += loss_u.item() train_vmask_loss += loss_v.item() train_loss += total_loss.item() # Only update once every accum_grad_batch_size if (batch_cnt + 1) % accum_grad_batch_size == 0: # perform a single optimization step (parameter update) optimizer.step() # Reset gradients, for next accumulated batch optimizer.zero_grad() batch_cnt += 1 ###################### # validate the model # ###################### print("Validating...") correspondence_block.eval() optimizer.zero_grad() batch_cnt = 0 with torch.no_grad(): # This is critical to limit GPU memory use for img_adr, image, idmask, umask, vmask in valid_loader: if batch_cnt % 100 == 0: print("Batch %i/%i finished!" % (batch_cnt, len(valid_idx) / batch_size)) # move tensors to GPU if CUDA is available image, idmask, umask, vmask = image.cuda(), idmask.cuda( ), umask.cuda(), vmask.cuda() # forward pass: compute predicted outputs by passing inputs to the model idmask_pred, umask_pred, vmask_pred = correspondence_block( image) # calculate the batch loss loss_id = criterion_id(idmask_pred, idmask) loss_u = criterion_u(umask_pred, umask) loss_v = criterion_v(vmask_pred, vmask) total_loss = loss_id + loss_u + loss_v # update average validation loss valid_idmask_loss += loss_id.item() valid_umask_loss += loss_u.item() valid_vmask_loss += loss_v.item() valid_loss += total_loss.item() batch_cnt += 1 # Calculate average losses train_loss = train_loss / len(train_loader.sampler) train_idmask_loss = train_idmask_loss / len(train_loader.sampler) train_umask_loss = train_umask_loss / len(train_loader.sampler) train_vmask_loss = train_vmask_loss / len(train_loader.sampler) valid_loss = valid_loss / len(valid_loader.sampler) valid_idmask_loss = valid_idmask_loss / len(valid_loader.sampler) valid_umask_loss = valid_umask_loss / len(valid_loader.sampler) valid_vmask_loss = valid_vmask_loss / len(valid_loader.sampler) # print training/validation statistics print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'. format(epoch, train_loss, valid_loss)) print('Train IDMask loss: %.6f \tUMask loss: %.6f \tUMask loss: %.6f' % \ (train_idmask_loss, train_umask_loss, train_vmask_loss)) print('Valid IDMask loss: %.6f \tUMask loss: %.6f \tUMask loss: %.6f' % \ (valid_idmask_loss, valid_umask_loss, valid_vmask_loss)) scheduler.step(valid_loss) # TODO - monitor for train/val divergence and stop # save model if validation loss has decreased # Don't save model on the first epoch in case we're transfer learning and initialized to Inf if valid_loss <= valid_loss_min and (epoch > 1 or epochs == 1): print( 'Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...' .format(valid_loss_min, valid_loss)) if not out_path_and_name: correspondence_block_filename = os.path.join( train_eval_dir, 'correspondence_block.pt') else: correspondence_block_filename = out_path_and_name torch.save(correspondence_block.state_dict(), correspondence_block_filename) valid_loss_min = valid_loss if epoch == 1: valid_loss_min = valid_loss
def train_model(output_folder, tensorboard_dir, scratch_dir, batch_size, train_lmdb_filepath, test_lmdb_filepath, number_classes, balance_classes, learning_rate, test_every_n_steps, use_augmentation, augmentation_reflection, augmentation_rotation, augmentation_jitter, augmentation_noise, augmentation_scale, augmentation_blur_max_sigma): if not os.path.exists(output_folder): os.makedirs(output_folder) training_checkpoint_filepath = None # use all available devices mirrored_strategy = tf.distribute.MirroredStrategy() with mirrored_strategy.scope(): global_batch_size = batch_size * mirrored_strategy.num_replicas_in_sync # scale the number of I/O readers based on the GPU count reader_count = READER_COUNT * mirrored_strategy.num_replicas_in_sync print('Setting up test image reader') test_reader = imagereader.ImageReader(test_lmdb_filepath, use_augmentation=False, shuffle=False, num_workers=reader_count, balance_classes=False, number_classes=number_classes) print('Test Reader has {} images'.format( test_reader.get_image_count())) print('Setting up training image reader') train_reader = imagereader.ImageReader( train_lmdb_filepath, use_augmentation=use_augmentation, shuffle=True, num_workers=reader_count, balance_classes=balance_classes, number_classes=number_classes, augmentation_reflection=augmentation_reflection, augmentation_rotation=augmentation_rotation, augmentation_jitter=augmentation_jitter, augmentation_noise=augmentation_noise, augmentation_scale=augmentation_scale, augmentation_blur_max_sigma=augmentation_blur_max_sigma) print('Train Reader has {} images'.format( train_reader.get_image_count())) try: # if any errors happen we want to catch them and shut down the multiprocess readers print('Starting Readers') train_reader.startup() test_reader.startup() train_dataset = train_reader.get_tf_dataset() train_dataset = train_dataset.batch(global_batch_size).prefetch( reader_count) train_dataset = mirrored_strategy.experimental_distribute_dataset( train_dataset) test_dataset = test_reader.get_tf_dataset() test_dataset = test_dataset.batch(global_batch_size).prefetch( reader_count) test_dataset = mirrored_strategy.experimental_distribute_dataset( test_dataset) print('Creating model') model = unet_model.UNet(number_classes, global_batch_size, train_reader.get_image_size(), learning_rate) checkpoint = tf.train.Checkpoint(optimizer=model.get_optimizer(), model=model.get_keras_model()) # print the model summary to file with open(os.path.join(output_folder, 'model.txt'), 'w') as summary_fh: print_fn = lambda x: print(x, file=summary_fh) model.get_keras_model().summary(print_fn=print_fn) # train_epoch_size = train_reader.get_image_count()/batch_size train_epoch_size = test_every_n_steps test_epoch_size = test_reader.get_image_count() / batch_size test_loss = list() # Prepare the metrics. train_loss_metric = tf.keras.metrics.Mean('train_loss', dtype=tf.float32) train_acc_metric = tf.keras.metrics.CategoricalAccuracy( 'train_accuracy') test_loss_metric = tf.keras.metrics.Mean('test_loss', dtype=tf.float32) test_acc_metric = tf.keras.metrics.CategoricalAccuracy( 'test_accuracy') train_log_dir = os.path.join(tensorboard_dir, 'train') if not os.path.exists(train_log_dir): os.makedirs(train_log_dir) test_log_dir = os.path.join(tensorboard_dir, 'test') if not os.path.exists(test_log_dir): os.makedirs(test_log_dir) train_summary_writer = tf.summary.create_file_writer(train_log_dir) test_summary_writer = tf.summary.create_file_writer(test_log_dir) epoch = 0 print('Running Network') while True: # loop until early stopping print('---- Epoch: {} ----'.format(epoch)) # Iterate over the batches of the train dataset. for step, (batch_images, batch_labels) in enumerate(train_dataset): if step > train_epoch_size: break inputs = (batch_images, batch_labels, train_loss_metric, train_acc_metric) model.dist_train_step(mirrored_strategy, inputs) print('Train Epoch {}: Batch {}/{}: Loss {} Accuracy = {}'. format(epoch, step, train_epoch_size, train_loss_metric.result(), train_acc_metric.result())) with train_summary_writer.as_default(): tf.summary.scalar('loss', train_loss_metric.result(), step=int(epoch * train_epoch_size + step)) tf.summary.scalar('accuracy', train_acc_metric.result(), step=int(epoch * train_epoch_size + step)) train_loss_metric.reset_states() train_acc_metric.reset_states() # Iterate over the batches of the test dataset. epoch_test_loss = list() for step, (batch_images, batch_labels) in enumerate(test_dataset): if step > test_epoch_size: break inputs = (batch_images, batch_labels, test_loss_metric, test_acc_metric) loss_value = model.dist_test_step(mirrored_strategy, inputs) epoch_test_loss.append(loss_value.numpy()) # print('Test Epoch {}: Batch {}/{}: Loss {}'.format(epoch, step, test_epoch_size, loss_value)) test_loss.append(np.mean(epoch_test_loss)) print('Test Epoch: {}: Loss = {} Accuracy = {}'.format( epoch, test_loss_metric.result(), test_acc_metric.result())) with test_summary_writer.as_default(): tf.summary.scalar('loss', test_loss_metric.result(), step=int((epoch + 1) * train_epoch_size)) tf.summary.scalar('accuracy', test_acc_metric.result(), step=int((epoch + 1) * train_epoch_size)) test_loss_metric.reset_states() test_acc_metric.reset_states() with open(os.path.join(output_folder, 'test_loss.csv'), 'w') as csvfile: for i in range(len(test_loss)): csvfile.write(str(test_loss[i])) csvfile.write('\n') # determine if to record a new checkpoint based on best test loss if (len(test_loss) - 1) == np.argmin(test_loss): # save tf checkpoint print('Test loss improved: {}, saving checkpoint'.format( np.min(test_loss))) training_checkpoint_filepath = checkpoint.write( os.path.join(scratch_dir, "ckpt")) # determine early stopping print('Best Current Epoch Selection:') print('Test Loss:') print(test_loss) min_test_loss = np.min(test_loss) error_from_best = np.abs(test_loss - min_test_loss) error_from_best[error_from_best < CONVERGENCE_TOLERANCE] = 0 best_epoch = np.where(error_from_best == 0)[0][ 0] # unpack numpy array, select first time since that value has happened print('Best epoch: {}'.format(best_epoch)) if len(test_loss) - best_epoch > EARLY_STOPPING_COUNT: break # break the epoch loop epoch = epoch + 1 finally: # if any erros happened during training, shut down the disk readers print('Shutting down train_reader') train_reader.shutdown() print('Shutting down test_reader') test_reader.shutdown() if training_checkpoint_filepath is not None: # restore the checkpoint and generate a saved model model = unet_model.UNet(number_classes, global_batch_size, train_reader.get_image_size(), learning_rate) checkpoint = tf.train.Checkpoint(optimizer=model.get_optimizer(), model=model.get_keras_model()) checkpoint.restore(training_checkpoint_filepath) tf.saved_model.save(model.get_keras_model(), output_folder)
def train(): global_step = tf.train.get_or_create_global_step() # global_step = 1 # sometimes ending up on GPUs resulting in a slowdown. with tf.device('/cpu:0'): rawList, segList = unet_input.GetFileNameList(DATA_TRAIN) rawImageBatch, segImageBatch = unet_input.GetBatchFromFile_Train( rawList, segList, BATCH_SIZE) # Build a Graph that computes the predicted HR images from GIBBS RING CLEAR model. PredBatch = unet_model.UNet(rawImageBatch) # Calculate loss. TrainLoss = unet_model.loss(segImageBatch, PredBatch) # Get the training op for optimizing loss TrainOp = unet_model.optimize(TrainLoss, LearningRate, global_step) TrainMeanPSNR = unet_model.evaluation(segImageBatch, PredBatch) dice = unet_model.Dice(segImageBatch, PredBatch) # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build the summary operation from the last tower summaries. summ_op = tf.summary.merge_all() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU implementations. init_step = 0 config = tf.ConfigProto() config.log_device_placement = LogDevicePlacement #是否打印设备分配日志 config.allow_soft_placement = AllowSoftPlacement #如果指定的设备部存在,允许TF自动分配设备。 with tf.Session(config=config) as sess: if TrainFromExist: init_step = restore_model(sess, saver, ExistModelDir, global_step) else: print("Initializing Variables...") sess.run(tf.global_variables_initializer()) # queue runners, multi threads and coordinator coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) print("Defining Summary Writer...") summary_writer = tf.summary.FileWriter(LogDir, sess.graph) min_loss = float('Inf') max_psnr = 0 max_dice = 0 try: print("Starting To Train...") for step in range(init_step, MAX_STEPS): # excute training once! #start_time = time.time() sess.run(TrainOp) #duration = time.time() - start_time if (step + 1) % LogFreq == 0: # examples_per_second = BATCH_SIZE/duration # seconds_per_batch = float(duration) loss_value, PSNR_value, dice_value, batch_raw, image_labels, model_seg = sess.run( [ TrainLoss, TrainMeanPSNR, dice, rawImageBatch, segImageBatch, PredBatch ]) # if min_loss > loss_value: min_loss = loss_value if max_psnr < PSNR_value: max_psnr = PSNR_value if max_dice < dice_value: max_dice = dice_value with open("Records/train_records.txt", "a") as file: format_str = "%d\t%.6f\t%.6f\t%.6f\t%.6f\n" file.write( str(format_str) % (step + 1, loss_value, min_loss, dice_value, max_dice)) print("%s ---- step %d:" % (datetime.now(), step + 1)) print("\tLOSS = %.6f\tmin_Loss = %.6f" % (loss_value, min_loss)) print("\tPSNR = %.4f\tmax_PSNR = %.4f" % (PSNR_value, max_psnr)) print("\tDICE = %.2f\tmax_DICS = %.2f" % (dice_value, max_dice)) if ((step + 1) % 100 == 0) or ((step + 1) == MAX_STEPS): summary_str = sess.run(summ_op) summary_writer.add_summary(summary_str, step + 1) if (step == 0) or ((step + 1) % 200 == 0) or ((step + 1) == MAX_STEPS): checkpoint_path = os.path.join(LogDir, 'model.ckpt') print("saving checkpoint into %s-%d" % (checkpoint_path, step + 1)) saver.save(sess, checkpoint_path, global_step=step + 1) except Exception as e: coord.request_stop(e) finally: coord.request_stop() coord.join(threads, stop_grace_period_secs=10)
def main(mriEncoderPath, usEncoderPath): numEpochs, batchSize = 50, 16 # Use GPU if available use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") # create visdom logger instance logger = vl.VisdomLogger(port="65531") logger.deleteWindow(win="test") # Initialize the MRI US dataset and the dataset loader dir = os.path.dirname(__file__) modelPath = os.path.join(dir, '..', 'savedModels/transformation') trainDatasetDir = os.path.join(dir, '..', 'dataset/sintefHdf5/train') trainDataset = dataset.SintefDataset(rootDir=trainDatasetDir) valDatasetDir = os.path.join(dir, '..', 'dataset/sintefHdf5/val') validationDataset = dataset.SintefDataset(rootDir=valDatasetDir) dataloader = batchloader.DataLoader(trainDataset, batch_size=batchSize, shuffle=True, num_workers=0) # Initialize the MRI US dataset dir = os.path.dirname(__file__) testDatasetDir = '/media/alok/Data/deepLearning/datasets/sintefWolfgangAffine/tranRotMriVol/train' testDataset = TestDataset.SintefDatasetValTest(rootDir=testDatasetDir, mriFilePrefix='T1-Vol', usFilePrefix='US-DS') #create transformation models and add them to optimizer mriForwardmodel = models.UNet(n_classes=1, n_channels=1).to(device) usForwardmodel = models.UNet(n_classes=1, n_channels=1).to(device) usShallowModel = models.ShallowNet(n_classes=1, n_channels=1).to(device) mriShallowModel = models.ShallowNet(n_classes=1, n_channels=1).to(device) networkParams = list(mriForwardmodel.parameters()) + list( usForwardmodel.parameters()) + list( mriShallowModel.parameters()) + list(usShallowModel.parameters()) optimizer = torch.optim.Adam(networkParams, lr=1e-5, weight_decay=1e-9) #Optimizer learning rate decay. scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98) #Load encoder models mriEncoderModel = AutoEncoder().to(device) usEncoderModel = AutoEncoder().to(device) mriEncoderModel.load_state_dict(torch.load(mriEncoderPath)) usEncoderModel.load_state_dict(torch.load(usEncoderPath)) mriEncoderModel.eval() usEncoderModel.eval() # create instance of loss class l2Loss = torch.nn.MSELoss() l1Loss = torch.nn.L1Loss() iteration = 0 for epochNum in range(50): for i_batch, sample_batched in enumerate(dataloader): mri = sample_batched['mri'].to(device) us = sample_batched['us'].to(device) # perform optimization optimizer.zero_grad() # Compute the shared representation mriOut = mriForwardmodel(mri) usOut = usForwardmodel(us) #StdMean Normalization, later Sigmoid assures the range of the image is between 0-1 #mriOut = meanStdNormalize(mriOut, targetMean=0.0, targetStd=1) #usOut = meanStdNormalize(usOut, targetMean=0.0, targetStd=1) #mriOut = rangeNormalize(mriOut) #usOut = rangeNormalize(usOut) # Compute the encoded Representations (_, _, _, _, mriOutEncoded) = mriEncoderModel(mriShallowModel(mriOut), get_features=True) (_, _, _, _, usOutEncoded) = usEncoderModel(usShallowModel(usOut), get_features=True) (_, _, _, _, mriEncoded) = mriEncoderModel(mri, get_features=True) (_, _, _, _, usEncoded) = usEncoderModel(us, get_features=True) # Calculate Losses (totalLoss, mriUsOutDiffLoss, mriEnLoss, usEnLoss, sparsityLoss) = calculateTotalLoss(mriOut, usOut, mriOutEncoded, usOutEncoded, mriEncoded, usEncoded, l2Loss, l1Loss) # perform backward pass totalLoss.backward() optimizer.step() # print Epoch number and Loss print("Epoch: ", epochNum, "iteration: ", i_batch, "Total loss: ", totalLoss.item(), "MriUSLoss: ", mriUsOutDiffLoss.item(), "MriEncoderLoss: ", mriEnLoss.item(), "UsEncoderLoss:", usEnLoss.item()) iteration = iteration + 1 logger.appendLine(X=np.array([iteration]), Y=np.array([totalLoss.item()]), win="test", name="Total Loss", xlabel="Iteration Number", ylabel="Loss") logger.appendLine(X=np.array([iteration]), Y=np.array([mriUsOutDiffLoss.item()]), win="test", name="MRI US Diff Loss", xlabel="Iteration Number", ylabel="Loss") logger.appendLine(X=np.array([iteration]), Y=np.array([mriEnLoss.item()]), win="test", name="MRI Encoder Loss", xlabel="Iteration Number", ylabel="Loss") logger.appendLine(X=np.array([iteration]), Y=np.array([usEnLoss.item()]), win="test", name="US Encoder Loss", xlabel="Iteration Number", ylabel="Loss") logger.appendLine(X=np.array([iteration]), Y=np.array([sparsityLoss.item()]), win="test", name="Sparsity Loss", xlabel="Iteration Number", ylabel="Loss") usValIn = torch.Tensor([]) mriValIn = torch.Tensor([]) if iteration % 20 == 0: #switch model to eval mode for validation mriForwardmodel.eval() usForwardmodel.eval() # Show output on a validation image after some iterations valBatchSize = 8 valSetIds = np.random.randint(low=1, high=validationDataset.__len__(), size=valBatchSize) # Make a concatenated tensor of inputs for i in range(valBatchSize): valData = validationDataset[valSetIds[i]] usValIn = torch.cat((usValIn, valData['us'].unsqueeze(0)), 0) mriValIn = torch.cat( (mriValIn, valData['mri'].unsqueeze(0)), 0) usValIn = usValIn.to(device) mriValIn = mriValIn.to(device) mriValOut = mriForwardmodel(mriValIn) usValOut = usForwardmodel(usValIn) #StdMean Normalization, later Sigmoid assures the range of the image is between 0-1 #mriValOut = meanStdNormalize(mriValOut, targetMean=0.0, targetStd=1) #usValOut = meanStdNormalize(usValOut, targetMean=0.0, targetStd=1) #mriValOut = rangeNormalize(mriValOut) #usValOut = rangeNormalize(usValOut) # Compute the encoded Representations (_, _, _, _, mriValOutEncoded) = mriEncoderModel( mriShallowModel(mriValOut), get_features=True) (_, _, _, _, usValOutEncoded) = usEncoderModel(usShallowModel(usValOut), get_features=True) (_, _, _, _, mriValEncoded) = mriEncoderModel(mriValIn, get_features=True) (_, _, _, _, usValEncoded) = usEncoderModel(usValIn, get_features=True) # find and plot loss on the validation batch also Calculate Losses (totalLossVal, mriUsValOutDiffLoss, _, _, _) = calculateTotalLoss(mriValOut, usValOut, mriValOutEncoded, usValOutEncoded, mriValEncoded, usValEncoded, l2Loss, l1Loss) logger.appendLine(X=np.array([iteration]), Y=np.array([mriUsValOutDiffLoss.item()]), win="test", name="[VAL] MRI US Diff Loss", xlabel="Iteration Number", ylabel="Loss") logger.appendLine(X=np.array([iteration]), Y=np.array([totalLossVal.item()]), win="test", name="[VAL] Total Loss", xlabel="Iteration Number", ylabel="Loss") mriDisplayImages = torch.cat((mriValIn, mriValOut), 0) usDisplayImages = torch.cat((usValIn, usValOut), 0) mriTrainDisplayImages = torch.cat((mri, mriOut), 0) usTrainDisplayImages = torch.cat((us, usOut), 0) # plot images in visdom in a grid, MRI and US has different grids. logger.plotImages(images=mriDisplayImages, nrow=valBatchSize, win="mriGrid", caption='MRI Validation') logger.plotImages(images=usDisplayImages, nrow=valBatchSize, win="usGrid", caption='US Validation') logger.plotImages(images=mriTrainDisplayImages, nrow=batchSize, win="trainMRI", caption='MRI Train') logger.plotImages(images=usTrainDisplayImages, nrow=batchSize, win="trainUS", caption='US Train') #switch back to train mode usForwardmodel.train() mriForwardmodel.train() if epochNum % 3 == 0: torch.save( mriForwardmodel.state_dict(), os.path.join(modelPath, "mriForward" + str(epochNum) + ".pth")) torch.save( usForwardmodel.state_dict(), os.path.join(modelPath, "usForward" + str(epochNum) + ".pth")) #Learning rate decay after every Epoch scheduler.step()