def prepare_data(self): # Define parent directory dataset_directory = '{}/data'.format(self.opt.primary_directory) training_data_dir = '{}/train-pats'.format(dataset_directory) validation_data_dir = '{}/validation-pats-no-dose'.format( dataset_directory) # path where any data generated by this code (e.g., predictions, models) are stored results_dir = '{}/results'.format( dataset_directory) # parent path where results are stored model_results_path = '{}/{}'.format(results_dir, self.model_name) self.prediction_dir = '{}/{}-predictions'.format( model_results_path, self.stage) os.makedirs(self.prediction_dir, exist_ok=True) # Prepare the data directory plan_paths = get_paths( training_data_dir, ext='') # gets the path of each plan's directory num_train_pats = np.minimum(150, len( plan_paths)) # number of plans that will be used to train model self.training_paths = plan_paths[: num_train_pats] # list of training plans self.hold_out_paths = plan_paths[ num_train_pats:] # list of paths used for held out testing
def initialize_networks(self): """ Load the newest model, or if no model exists with the appropriate name a new model will be created. :return: """ # Initialize variables for models all_models = get_paths(self.model_dir, ext='h5') # Get last epoch of existing models if they exist for model_name in all_models: model_epoch_number = model_name.split( self.model_path_template)[-1].split('.h5')[0] if model_epoch_number.isdigit(): self.epoch_start = max(self.epoch_start, int(model_epoch_number)) # Build new models or load most recent old model if one exists if self.epoch_start >= self.epoch_last: print('Model fully trained, loading model from epoch {}'.format( self.epoch_last)) return 0, 0, 0, self.epoch_last elif self.epoch_start >= 1: # If models exist then load them self.generator = load_model('{}{}.h5'.format( self.model_path_template, self.epoch_start)) else: # If models don't exist then define them self.define_generator()
def load_and_shape_data(self, path_to_load): """ Reshapes data that is stored as vectors into matrices :param path_to_load: the path of the data that needs to be loaded. If the path is a directory, all data in the directory will be loaded. If path is a file then only that file will be loaded. :return: Loaded data with the appropriate shape """ # Initialize the dictionary for the loaded files loaded_file = {} if '.csv' in path_to_load: loaded_file[self.mode_name] = load_file(path_to_load) else: files_to_load = get_paths(path_to_load, ext='') # Load files and get names without file extension or directory for f in files_to_load: f_name = f.split('/')[-1].split('.')[0] if f_name in self.required_files or f_name in self.full_roi_list: loaded_file[f_name] = load_file(f) # Initialize matrices for features shaped_data = {}.fromkeys(self.required_files) for key in shaped_data: shaped_data[key] = np.zeros(self.required_files[key]) # Populate matrices that were no initialized as [] for key in shaped_data: if key == 'structure_masks': # Convert dictionary of masks into a tensor (necessary for tensorflow) for roi_idx, roi in enumerate(self.full_roi_list): if roi in loaded_file.keys(): np.put(shaped_data[key], self.num_rois * loaded_file[roi] + roi_idx, int(1)) elif key == 'possible_dose_mask': np.put(shaped_data[key], loaded_file[key], int(1)) elif key == 'voxel_dimensions': shaped_data[key] = loaded_file[key] else: # Files with shape indices = np.array(loaded_file[key]['indices']) data = np.array(loaded_file[key]['data']) np.put(shaped_data[key], indices.astype(np.int64), data.astype(np.int64)) if key == 'ct': shaped_data[key] = shaped_data[key].clip(None, 4071) # if not self.opt.no_scaling: # if key == 'dose' and self.mode_name != 'evaluation': # # shaped_data[key] = shaped_data[key]/80.0 # shaped_data[key] = 2.0*shaped_data[key]/80.0 - 1.0 return shaped_data
# Define project directories # TODO: Must define the path of where the data is stored. # primary_directory = '/home/user_name/open-kbp' # directory where everything is stored primary_directory = '/Users/aaronbabier/Downloads/public_dat-2' # Define directory where given data is stored training_data_dir = '{}/train-pats'.format(primary_directory) validation_data_dir = '{}/validation-pats-no-dose'.format( primary_directory) # path where any data generated by this code (e.g., predictions, models) are stored results_dir = '{}/results'.format(primary_directory) # Name model to train and number of epochs to train it for prediction_name = 'baseline' number_of_training_epochs = 1 # Prepare the data directory plan_paths = get_paths(training_data_dir, ext='') # gets the path of each plan's directory num_train_pats = np.minimum( 50, len(plan_paths)) # number of plans that will be used to train model training_paths = plan_paths[:num_train_pats] # list of training plans hold_out_paths = plan_paths[ num_train_pats:] # list of paths used for held out testing # Train a model data_loader_train = DataLoader(training_paths) dose_prediction_model_train = PredictionModel(data_loader_train, results_dir, model_name=prediction_name) dose_prediction_model_train.train_model(epochs=number_of_training_epochs, save_frequency=1, keep_model_history=1)
import numpy as np from tqdm import tqdm from torch.autograd import Variable from src.models import networks from src.options.train_options import TrainOptions from torchsummary import summary import src.models.medicalzoo.medzoo as medzoo from torchvision import transforms primary_directory = '/Users/mkazi/Google Drive/KBP_Challenge' dataset_dir = '{}/data'.format(primary_directory) training_data_dir = '{}/train-pats'.format(dataset_dir) # training_data_dir = '{}/validation-pats-no-dose'.format(dataset_dir) plan_paths = get_paths(training_data_dir, ext='') # gets the path of each plan's directory num_train_pats = np.minimum( 100, len(plan_paths)) # number of plans that will be used to train model training_paths = plan_paths[:num_train_pats] args = [ '--batchSize', '2', '--primary_directory', primary_directory, '--which_model_netG', 'pix2pixhd', '--which_model_netD', 'multiscale', '--n_layers_D', '3', '--num_D', '3', '--resnet_depth', '10', '--which_direction', 'AtoB', '--input_nc', '1', '--lambda_A', '100', '--lr_policy', 'plateau', '--epoch_count', '200', '--load_epoch', '-1', '--lr_G', '0.01', '--lr_max', '0.01', '--lr_step_size', '25', '--loss_function', 'smoothed_L1', '--init_type', 'xavier', '--no_scaling', '--no_normalization', '--patience', '5', '--n_critic', '5', '--weight_cliping_limit', '0.01' ]
def prepare_data(self): self.plan_paths = get_paths(self.hparams.training_data_dir, ext='') self.num_train_pats = 200 #np.minimum(self.hparams.num_train_pats, len(self.plan_paths)) # number of plans that will be used to train model self.validation_data_paths = get_paths( self.hparams.validation_data_dir, ext='') self.test_data_paths = get_paths(self.hparams.test_data_dir, ext='')
primary_directory = '/Users/aaronbabier/Documents/GitHub/open-kbp-post' #'/home/user_name/open-kbp' # directory where everything is stored # Define directory where given data is stored main_data_dir = '{}/provided-data'.format(primary_directory) training_data_dir = '{}/train-pats'.format(main_data_dir) validation_data_dir = '{}/validation-pats'.format(main_data_dir) testing_data_dir = '{}/test-pats'.format(main_data_dir) # Define hold out set test_time = False # Only change this to True when the model has been fully tuned on the validation set # path where any data generated by this code (e.g., predictions, models) are stored results_dir = '{}/results'.format(primary_directory) # Name model to train and number of epochs to train it for prediction_name = 'baseline' number_of_training_epochs = 1 # This should probably be increased to 100-200 after your dry run # Prepare the data directory training_plan_paths = get_paths( training_data_dir, ext='') # gets the path of each plan's directory # Train a model data_loader_train = DataLoader(training_plan_paths) dose_prediction_model_train = PredictionModel(data_loader_train, results_dir, model_name=prediction_name) dose_prediction_model_train.train_model(epochs=number_of_training_epochs, save_frequency=1, keep_model_history=1) # Define hold out set if test_time is False: hold_out_plan_paths = get_paths( validation_data_dir, ext='') # list of paths used for held out validation
from torch.utils.data import DataLoader from provided_code.dose_evaluation_class import EvaluateDose import numpy as np primary_directory = '/Users/mkazi/Google Drive/KBP_Challenge' args = [ '--which_model_netG', 'unet_128_3d', '--which_direction', 'AtoB', '--batchSize', '1', '--input_nc', '1', '--lambda_A', '100' ] opt = TrainOptions().parse(args) dataset_dir = '{}/data'.format(primary_directory) training_data_dir = '{}/train-pats'.format(dataset_dir) plan_paths = get_paths(training_data_dir, ext='') # gets the path of each plan's directory num_train_pats = np.minimum( 100, len(plan_paths)) # number of plans that will be used to train model hold_out_paths = plan_paths[ num_train_pats:] # list of paths used for held out testing data_loader_hold_out_eval = KBPDataset(hold_out_paths, mode_name='evaluation') prediction_dir = '{}/data/results/pix2pix_default/hold-out-tests-predictions'.format( primary_directory) prediction_paths = get_paths(prediction_dir, ext='csv') hold_out_prediction_loader = KBPDataset(prediction_paths, mode_name='predicted_dose') dose_evaluator = EvaluateDose(data_loader_hold_out_eval, hold_out_prediction_loader)