def train(data_dir, atlas_file, model_dir, gpu_id, lr, nb_epochs, prior_lambda, image_sigma, steps_per_epoch, batch_size, load_model_file, bidir, initial_epoch=0): """ model training function :param data_dir: folder with npz files for each subject. :param atlas_file: atlas filename. So far we support npz file with a 'vol' variable :param model_dir: model folder to save to :param gpu_id: integer specifying the gpu to use :param lr: learning rate :param nb_epochs: number of training iterations :param prior_lambda: the prior_lambda, the scalar in front of the smoothing laplacian, in MICCAI paper :param image_sigma: the image sigma in MICCAI paper :param steps_per_epoch: frequency with which to save models :param batch_size: Optional, default of 1. can be larger, depends on GPU memory and volume size :param load_model_file: optional h5 model file to initialize with :param bidir: logical whether to use bidirectional cost function """ # load atlas from provided files. The atlas we used is 160x192x224. atlas_vol = np.load(atlas_file)['vol'][np.newaxis, ..., np.newaxis] vol_size = atlas_vol.shape[1:-1] # prepare data files # for the CVPR and MICCAI papers, we have data arranged in train/validate/test folders # inside each folder is a /vols/ and a /asegs/ folder with the volumes # and segmentations. All of our papers use npz formated data. train_vol_names = glob.glob(os.path.join(data_dir, '*.npz')) random.shuffle(train_vol_names) # shuffle volume list assert len(train_vol_names) > 0, "Could not find any training data" # Diffeomorphic network architecture used in MICCAI 2018 paper nf_enc = [16, 32, 32, 32] nf_dec = [32, 32, 32, 32, 16, 3] # prepare model folder if not os.path.isdir(model_dir): os.mkdir(model_dir) # gpu handling gpu = '/gpu:%d' % 0 # gpu_id os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True set_session(tf.Session(config=config)) # prepare the model with tf.device(gpu): # the MICCAI201 model takes in [image_1, image_2] and outputs [warped_image_1, velocity_stats] # in these experiments, we use image_2 as atlas model = networks.miccai2018_net(vol_size, nf_enc, nf_dec, bidir=bidir) # load initial weights if load_model_file is not None and load_model_file != "": model.load_weights(load_model_file) # save first iteration model.save(os.path.join(model_dir, '%02d.h5' % initial_epoch)) # compile # note: best to supply vol_shape here than to let tf figure it out. flow_vol_shape = model.outputs[-1].shape[1:-1] loss_class = losses.Miccai2018(image_sigma, prior_lambda, flow_vol_shape=flow_vol_shape) if bidir: model_losses = [ loss_class.recon_loss, loss_class.recon_loss, loss_class.kl_loss ] loss_weights = [0.5, 0.5, 1] else: model_losses = [loss_class.recon_loss, loss_class.kl_loss] loss_weights = [1, 1] # data generator nb_gpus = len(gpu_id.split(',')) assert np.mod(batch_size, nb_gpus) == 0, \ 'batch_size should be a multiple of the nr. of gpus. ' + \ 'Got batch_size %d, %d gpus' % (batch_size, nb_gpus) train_example_gen = datagenerators.example_gen(train_vol_names, batch_size=batch_size) atlas_vol_bs = np.repeat(atlas_vol, batch_size, axis=0) miccai2018_gen = datagenerators.miccai2018_gen(train_example_gen, atlas_vol_bs, batch_size=batch_size, bidir=bidir) # prepare callbacks save_file_name = os.path.join(model_dir, '{epoch:02d}.h5') # fit generator with tf.device(gpu): # multi-gpu support if nb_gpus > 1: save_callback = nrn_gen.ModelCheckpointParallel(save_file_name) mg_model = multi_gpu_model(model, gpus=nb_gpus) # single gpu else: save_callback = ModelCheckpoint(save_file_name) mg_model = model mg_model.compile(optimizer=Adam(lr=lr), loss=model_losses, loss_weights=loss_weights) mg_model.fit_generator(miccai2018_gen, initial_epoch=initial_epoch, epochs=nb_epochs, callbacks=[save_callback], steps_per_epoch=steps_per_epoch, verbose=1)
def train(data_dir, model, model_name, gpu_id, lr, nb_epochs, reg_param, steps_per_epoch, batch_size, load_model_file, atlas_file, max_clip, distance, patch_size, use_ssc, use_gaussian_kernel, use_fixed_var, use_miccai, initial_epoch=0): """ model training function :param data_dir: folder with npz files for each subject. :param atlas_file: atlas filename. So far we support npz file with a 'vol' variable :param model: either vm1 or vm2 (based on CVPR 2018 paper) :param model_dir: the model directory to save to :param gpu_id: integer specifying the gpu to use :param lr: learning rate :param n_iterations: number of training iterations :param reg_param: the smoothness/reconstruction tradeoff parameter (lambda in CVPR paper) :param steps_per_epoch: frequency with which to save models :param batch_size: Optional, default of 1. can be larger, depends on GPU memory and volume size :param load_model_file: optional h5 model file to initialize with :param data_loss: data_loss: 'mse' or 'ncc """ # load atlas from provided files. The atlas we used is 160x192x224. atlas_vol = nib.load(atlas_file).get_data()[np.newaxis, ..., np.newaxis] atlas_vol = atlas_vol / np.max(atlas_vol) * max_clip # atlas_vol = nib.load('../data/t1_atlas.nii').get_data()[np.newaxis,...,np.newaxis] vol_size = atlas_vol.shape[1:-1] # prepare data files # for the CVPR and MICCAI papers, we have data arranged in train/validate/test folders # inside each folder is a /vols/ and a /asegs/ folder with the volumes # and segmentations. All of our papers use npz formated data. train_vol_names = glob.glob(os.path.join(data_dir, '*.npz')) random.shuffle(train_vol_names) # shuffle volume list assert len(train_vol_names) > 0, "Could not find any training data" # UNET filters for voxelmorph-1 and voxelmorph-2, # these are architectures presented in CVPR 2018 nf_enc = [16, 32, 32, 32] if model == 'vm1': nf_dec = [32, 32, 32, 32, 8, 8] elif model == 'vm2': nf_dec = [32, 32, 32, 32, 32, 16, 16] else: # 'vm2double': nf_enc = [f * 2 for f in nf_enc] nf_dec = [f * 2 for f in [32, 32, 32, 32, 32, 16, 16]] model_dir = "../models/" + model_name # prepare model folder if not os.path.isdir(model_dir): os.mkdir(model_dir) # GPU handling gpu = '/gpu:%d' % gpu_id os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) print(gpu) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True set_session(tf.Session(config=config)) # prepare the model with tf.device(gpu): # prepare the model # in the CVPR layout, the model takes in [image_1, image_2] and outputs [warped_image_1, flow] # in the experiments, we use image_2 as atlas if use_miccai: print('miccai: therefore diffeomorphic') model = networks.miccai2018_net(vol_size, nf_enc, nf_dec) else: model = networks.cvpr2018_net(vol_size, nf_enc, nf_dec) # load initial weights if load_model_file is not None and load_model_file != '': print('loading', load_model_file) model.load_weights(load_model_file) # save first iteration model.save(os.path.join(model_dir, '%02d.h5' % initial_epoch)) # data generator # nb_gpus = len(gpu_id.split(',')) # assert np.mod(batch_size, nb_gpus) == 0, \ # 'batch_size should be a multiple of the nr. of gpus. ' + \ # 'Got batch_size %d, %d gpus' % (batch_size, nb_gpus) nb_gpus = 1 train_example_gen = datagenerators.example_gen(train_vol_names, batch_size=batch_size) atlas_vol_bs = np.repeat(atlas_vol, batch_size, axis=0) if use_miccai: data_gen = datagenerators.cvpr2018_gen(train_example_gen, atlas_vol_bs, batch_size=batch_size) else: data_gen = datagenerators.miccai2018_gen(train_example_gen, atlas_vol_bs, batch_size=batch_size) # prepare callbacks save_file_name = os.path.join(model_dir, '{epoch:02d}.h5') loss_function = losses.mind(distance, patch_size, use_ssc=use_ssc, use_gaussian_kernel=use_gaussian_kernel, use_fixed_var=use_fixed_var) # fit generator with tf.device(gpu): # multi-gpu support if nb_gpus > 1: save_callback = nrn_gen.ModelCheckpointParallel(save_file_name) mg_model = multi_gpu_model(model, gpus=nb_gpus) # single-gpu else: save_callback = ModelCheckpoint(save_file_name, verbose=1) mg_model = model # compile mg_model.compile(optimizer=Adam(lr=lr), loss=[loss_function, losses.Grad('l2').loss], loss_weights=[1.0, reg_param]) # fit mg_model.fit_generator(data_gen, initial_epoch=initial_epoch, epochs=nb_epochs, callbacks=[save_callback], steps_per_epoch=steps_per_epoch, verbose=1)