def main(): print("Creating data iterator...") with open("config.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) train_dir = cfg['dataset_paths']['train_data'] train_labels = cfg['dataset_paths']['train_labels'] mri_iter = MRIDataIterator(train_dir, train_labels) outputdir = './testpreproc/' for patient_index, patient_slices in mri_iter.frames.iteritems(): slices_locations_to_names = {} i = 0 for sax_set in patient_slices: slices_locations_to_names[int(dicom.read_file(sax_set[0]).SliceLocation)] = i i += 1 median_array = slices_locations_to_names.keys() median_array.sort() values_closest_to_middle = [] if len(median_array) > 1: middle_value = (median_array[-1] + median_array[0])/2 for val in median_array: if math.sqrt((val - middle_value)**2) < 25: values_closest_to_middle.append(val) else: middle_value = median_array[0] values_closest_to_middle.append(median_array[0]) z = 0 values = [] for proposed_median_value in values_closest_to_middle: median_index = slices_locations_to_names[proposed_median_value] sax_set = patient_slices[median_index] time_series = [] for path in sax_set: f = dicom.read_file(path) gender = f.PatientsSex age = convert_age(f.PatientsAge) img = mri_iter.preproc(f.pixel_array.astype(np.float32) / np.max(f.pixel_array), 64, f.PixelSpacing, True, False) time_series.append(img) values.append(time_series) z +=1 data_array = np.array(values) rois,circles = calc_rois(data_array) i = 0 import pdb; pdb.set_trace() new_set = [] for sax_set in data_array: center_point, radius = circles[i] new_time_series = [] for img in sax_set: # make it square crop_img = img[center_point[0]-40:center_point[0]+40, center_point[1]-60:center_point[1]+20] new_time_series.append(crop_img) new_set.append(new_time_series) new_data_array = np.array(new_set) im = Image.fromarray(new_data_array[0][0]).convert('RGB') im.save('examples/' + randword() +'.png')
class TestDataUtilities(unittest.TestCase): def setUp(self): # TODO: this is super inefficient and dumb, rewrite so I don't read in entire space of all sax images self.mriIter = MRIDataIterator("/Users/Breakend/Documents/datasets/sciencebowl2015/train", "/Users/Breakend/Documents/datasets/sciencebowl2015/train.csv") def test_preprocessing(self): """ This is a visual test, sample a random image and preprocess it, view the result""" dicom_image_path = self.mriIter.frames[randint(1,599)][0][0] f = dicom.read_file(dicom_image_path) plt.figure(figsize=(10,3.6)) plt.subplot(131) plt.imshow(f.pixel_array) img = self.mriIter.preproc(f.pixel_array.astype(np.float32) / np.max(f.pixel_array), 64, f.PixelSpacing) plt.subplot(132) plt.imshow(img) plt.axis('off') plt.subplots_adjust(wspace=0, hspace=0., top=0.99, bottom=0.01, left=0.05, right=0.99) plt.show()
def main(num_epochs=30): # Load the dataset print("Creating data iterator...") with open("config.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) train_dir = cfg['dataset_paths']['train_data'] train_labels = cfg['dataset_paths']['train_labels'] batch_size = 5 mriIter = MRIDataIterator(train_dir, train_labels) network, train_fn, val_fn = compose_functions("systole", batch_size) #systole network_dia, train_fn_dia, val_fn_dia = compose_functions("diastole", batch_size) if os.path.exists('model-sys.npz'): with np.load('model-sys.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) if os.path.exists('model-dia.npz'): with np.load('model-dia.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network_dia, param_values) # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err_sys = 0 train_err_dia = 0 train_batches = 0 training_index = 1 validation_index = mriIter.last_training_index + 1 start_time = time.time() while mriIter.has_more_training_data(training_index + batch_size): gc.collect() print("Training index %s" % training_index) inputs, systole, diastole, metadata = mriIter.get_median_bucket_data(training_index, batch_size, return_gender_age=True) train_err_sys += train_fn(inputs, systole, metadata) train_err_dia += train_fn_dia(inputs, diastole, metadata) train_batches += batch_size training_index += batch_size augmented_training_index = training_index while (augmented_training_index < 500): gc.collect() print("Augmented training index: %s" % augmented_training_index) inputs, systole, diastole, metadata= mriIter.get_augmented_data(augmented_training_index, training_index - batch_size, return_gender_age=True) train_err_sys += train_fn(inputs, systole, metadata) train_err_dia += train_fn_dia(inputs, diastole, metadata) augmented_training_index += batch_size # And a full pass over the validation data: val_err_sys = 0 val_acc_sys = 0 val_err_dia = 0 val_acc_dia = 0 val_batches = 0 while mriIter.has_more_data(validation_index): gc.collect() print("Validation index %s" % validation_index) inputs, systole, diastole, metadata= mriIter.get_median_bucket_data(validation_index, batch_size, return_gender_age=True) # systole, diastole = targets err, prediction = val_fn(inputs, systole, metadata) y = 0 for prob_set in prediction: prob_dist = np.cumsum(prob_set) v = np.array(range(prediction.shape[1])) heavy = (v >= systole[y]) sq_dists = (prob_dist - heavy)**2 # print(prediction.shape) val_err_sys += err val_acc_sys += (sum(sq_dists) / 600.) y += 1 err, prediction = val_fn_dia(inputs, systole, metadata) y = 0 for prob_set in prediction: prob_dist = np.cumsum(prob_set) v = np.array(range(prediction.shape[1])) heavy = (v >= diastole[y]) sq_dists = (prob_dist - heavy)**2 val_err_dia += err val_acc_dia += (sum(sq_dists) / 600.) y += 1 val_batches += batch_size validation_index += batch_size # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) # print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) # print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print("Validation Sum Sqrts Systolic: {}".format(val_acc_sys)) print("Validation Sum Sqrts Diastolic: {}".format(val_acc_dia)) print("Train Err Systole: {}".format(train_err_sys)) print("Train Err Diastole: {}".format(train_err_dia)) print("CRPS:\t\t{:.6f} %".format( (val_acc_sys + val_acc_dia) / (val_batches) * .5)) # Optionally, you could now dump the network weights to a file like this: np.savez('model-sys.npz', *lasagne.layers.get_all_param_values(network)) np.savez('model-dia.npz', *lasagne.layers.get_all_param_values(network_dia))
def setUp(self): # TODO: this is super inefficient and dumb, rewrite so I don't read in entire space of all sax images self.mriIter = MRIDataIterator("/Users/Breakend/Documents/datasets/sciencebowl2015/train", "/Users/Breakend/Documents/datasets/sciencebowl2015/train.csv")
def compose_prediction_functions(scope): input_var = T.tensor4(scope + 'inputs') metadata_var = T.matrix(scope + 'metadatainputs') network = build_cnn(input_var, 20, metadata_var) prediction = lasagne.layers.get_output(network) prediction_fn = theano.function([input_var, metadata_var], prediction) return network, prediction_fn with open("config.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) validation_dir = cfg['dataset_paths']['validation_data'] sample_submission_path = cfg['dataset_paths']['sample_submission'] mriIter = MRIDataIterator(validation_dir) systolic_network, systolic_prediction_fn = compose_prediction_functions('sys') diastolic_network, diastolic_prediction_fn = compose_prediction_functions( 'dia') if os.path.exists('model-sys.npz'): with np.load('model-sys.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(systolic_network, param_values) if os.path.exists('model-dia.npz'): with np.load('model-dia.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(diastolic_network, param_values) #TODO: Abstract data retrieval so it applies to validation (i.e. get bounds from number of folders in dataset path)
def main(): print("Creating data iterator...") with open("config.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) train_dir = cfg['dataset_paths']['train_data'] train_labels = cfg['dataset_paths']['train_labels'] mri_iter = MRIDataIterator(train_dir, train_labels) outputdir = './testpreproc/' for patient_index, patient_slices in mri_iter.frames.iteritems(): slices_locations_to_names = {} i = 0 for sax_set in patient_slices: slices_locations_to_names[int( dicom.read_file(sax_set[0]).SliceLocation)] = i i += 1 median_array = slices_locations_to_names.keys() median_array.sort() values_closest_to_middle = [] if len(median_array) > 1: middle_value = (median_array[-1] + median_array[0]) / 2 for val in median_array: if math.sqrt((val - middle_value)**2) < 25: values_closest_to_middle.append(val) else: middle_value = median_array[0] values_closest_to_middle.append(median_array[0]) z = 0 values = [] for proposed_median_value in values_closest_to_middle: median_index = slices_locations_to_names[proposed_median_value] sax_set = patient_slices[median_index] time_series = [] for path in sax_set: f = dicom.read_file(path) gender = f.PatientsSex age = convert_age(f.PatientsAge) img = mri_iter.preproc( f.pixel_array.astype(np.float32) / np.max(f.pixel_array), 64, f.PixelSpacing, True, False) time_series.append(img) values.append(time_series) z += 1 data_array = np.array(values) rois, circles = calc_rois(data_array) i = 0 import pdb pdb.set_trace() new_set = [] for sax_set in data_array: center_point, radius = circles[i] new_time_series = [] for img in sax_set: # make it square crop_img = img[center_point[0] - 40:center_point[0] + 40, center_point[1] - 60:center_point[1] + 20] new_time_series.append(crop_img) new_set.append(new_time_series) new_data_array = np.array(new_set) im = Image.fromarray(new_data_array[0][0]).convert('RGB') im.save('examples/' + randword() + '.png')
def compose_prediction_functions(scope): input_var = T.tensor4(scope + 'inputs') metadata_var = T.matrix(scope + 'metadatainputs') network = build_cnn(input_var, 20, metadata_var) prediction = lasagne.layers.get_output(network) prediction_fn = theano.function([input_var, metadata_var], prediction) return network, prediction_fn with open("config.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) validation_dir = cfg['dataset_paths']['validation_data'] sample_submission_path = cfg['dataset_paths']['sample_submission'] mriIter = MRIDataIterator(validation_dir) systolic_network, systolic_prediction_fn = compose_prediction_functions('sys') diastolic_network, diastolic_prediction_fn = compose_prediction_functions('dia') if os.path.exists('model-sys.npz'): with np.load('model-sys.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(systolic_network, param_values) if os.path.exists('model-dia.npz'): with np.load('model-dia.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(diastolic_network, param_values) #TODO: Abstract data retrieval so it applies to validation (i.e. get bounds from number of folders in dataset path) index = 501
def main(num_epochs=30): # Load the dataset print("Creating data iterator...") with open("config.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) train_dir = cfg['dataset_paths']['train_data'] train_labels = cfg['dataset_paths']['train_labels'] batch_size = 5 mriIter = MRIDataIterator(train_dir, train_labels) network, train_fn, val_fn = compose_functions("systole", batch_size) #systole network_dia, train_fn_dia, val_fn_dia = compose_functions( "diastole", batch_size) if os.path.exists('model-sys.npz'): with np.load('model-sys.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) if os.path.exists('model-dia.npz'): with np.load('model-dia.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network_dia, param_values) # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err_sys = 0 train_err_dia = 0 train_batches = 0 training_index = 1 validation_index = mriIter.last_training_index + 1 start_time = time.time() while mriIter.has_more_training_data(training_index + batch_size): gc.collect() print("Training index %s" % training_index) inputs, systole, diastole, metadata = mriIter.get_median_bucket_data( training_index, batch_size, return_gender_age=True) train_err_sys += train_fn(inputs, systole, metadata) train_err_dia += train_fn_dia(inputs, diastole, metadata) train_batches += batch_size training_index += batch_size augmented_training_index = training_index while (augmented_training_index < 500): gc.collect() print("Augmented training index: %s" % augmented_training_index) inputs, systole, diastole, metadata = mriIter.get_augmented_data( augmented_training_index, training_index - batch_size, return_gender_age=True) train_err_sys += train_fn(inputs, systole, metadata) train_err_dia += train_fn_dia(inputs, diastole, metadata) augmented_training_index += batch_size # And a full pass over the validation data: val_err_sys = 0 val_acc_sys = 0 val_err_dia = 0 val_acc_dia = 0 val_batches = 0 while mriIter.has_more_data(validation_index): gc.collect() print("Validation index %s" % validation_index) inputs, systole, diastole, metadata = mriIter.get_median_bucket_data( validation_index, batch_size, return_gender_age=True) # systole, diastole = targets err, prediction = val_fn(inputs, systole, metadata) y = 0 for prob_set in prediction: prob_dist = np.cumsum(prob_set) v = np.array(range(prediction.shape[1])) heavy = (v >= systole[y]) sq_dists = (prob_dist - heavy)**2 # print(prediction.shape) val_err_sys += err val_acc_sys += (sum(sq_dists) / 600.) y += 1 err, prediction = val_fn_dia(inputs, systole, metadata) y = 0 for prob_set in prediction: prob_dist = np.cumsum(prob_set) v = np.array(range(prediction.shape[1])) heavy = (v >= diastole[y]) sq_dists = (prob_dist - heavy)**2 val_err_dia += err val_acc_dia += (sum(sq_dists) / 600.) y += 1 val_batches += batch_size validation_index += batch_size # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) # print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) # print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print("Validation Sum Sqrts Systolic: {}".format(val_acc_sys)) print("Validation Sum Sqrts Diastolic: {}".format(val_acc_dia)) print("Train Err Systole: {}".format(train_err_sys)) print("Train Err Diastole: {}".format(train_err_dia)) print("CRPS:\t\t{:.6f} %".format( (val_acc_sys + val_acc_dia) / (val_batches) * .5)) # Optionally, you could now dump the network weights to a file like this: np.savez('model-sys.npz', *lasagne.layers.get_all_param_values(network)) np.savez('model-dia.npz', *lasagne.layers.get_all_param_values(network_dia))