def test_model_save_and_load(): model = Sequential() model.add(Dense(10, activation='relu', input_shape=(5, 1))) with TemporaryDirectory() as dir_name: temp_file_path = os.path.join(dir_name, 'test_model') Algorithm.save_model(model, temp_file_path) _ = Algorithm.load_model(temp_file_path)
def test__preprocess_one_dicom(): algorithm = Algorithm() cohort = Cohort(ShaipWorkspace()) dcm1 = cohort.dicoms[0] image = algorithm._preprocess_one_dicom(dcm1) assert image.shape == Algorithm.imshape plt.imshow(image) plt.colorbar() plt.show()
def test_data_scaling(): algorithm = Algorithm() xs, ys = 64, 128 im = np.random.uniform(size=(xs, ys), high=2000, low=-300) n = 3 images = [im] * n # test set of just 3 images x_data = algorithm.data_scaling(images) expected_shape = (n, xs, ys, 1) assert x_data.shape == expected_shape assert x_data.dtype == np.float32
def main(self, argv): """ Main Experiment entry point. argv is the full argument list, so argv[0] is the program name. In production call as main(sys.argv)""" np.random.seed(42) self.setup_logging() self.command_line(argv) start_time = time.time() logging.info("Starting Kaggle-CTMI Experiment\n") logging.info("Finding data and groundtruth...") cohort = Cohort(self.shaip) train_cohort, test_cohort = cohort.split_cohort_train_test(0.3) logging.info("Found %d datasets", cohort.size) if self.args.train: logging.info("Training on %d datasets...", train_cohort.size) model = self.algorithm.train(train_cohort) Algorithm.save_model(model, self.shaip.models_dir + 'model') else: logging.info("Skipping training, model saved from earlier run") model = self.algorithm.load_model(self.shaip.models_dir + 'model') if self.args.predict: logging.info("Prediction on %d datasets...", test_cohort.size) test_predictions = self.algorithm.predict(model, test_cohort) else: logging.info( "Skipping prediction, using predictions from earlier run") # TODO: need to sort out caching of predictions test_predictions = None if self.args.evaluate: logging.info( "Generating results to ShaipWorkspace/outputs/results/index.html..." ) self.results.show_results(train_cohort, test_cohort, self.algorithm.history, test_predictions) logging.info("Kaggle-CTMI Experiment done in %4.1f seconds.\n", (time.time() - start_time))
def test_preprocessed_cohort_with_cache(): with TemporaryDirectory() as cache_dir: algorithm = Algorithm(cache_dir) cohort = Cohort(ShaipWorkspace()) start1 = time.time() ppimages1 = algorithm.preprocessed_images(cohort) elapsed1 = time.time() - start1 print("\nTime for first pass = %6.4f" % elapsed1) assert len(ppimages1) == cohort.size # And again, this time we should use the cache start2 = time.time() ppimages2 = algorithm.preprocessed_images(cohort) elapsed2 = time.time() - start2 print("Time for second pass = %6.4f\n" % elapsed2) assert len(ppimages2) == cohort.size np.array_equal(ppimages1, ppimages2) # We expect the second pass to be much faster assert elapsed2 < elapsed1 / 5
def __init__(self, shaip_root_dir): self.shaip = ShaipWorkspace(shaip_root_dir) self.shaip.check() self.algorithm = Algorithm(self.shaip.cache_dir) self.results = Results(self.shaip.results_dir) self.args = None
class Experiment(object): """ This is the top-level class, orchestrating train/test split of the cohort, training and evaluation. However he details are all elsewhere""" def __init__(self, shaip_root_dir): self.shaip = ShaipWorkspace(shaip_root_dir) self.shaip.check() self.algorithm = Algorithm(self.shaip.cache_dir) self.results = Results(self.shaip.results_dir) self.args = None def command_line(self, argv): parser = argparse.ArgumentParser( prog='experiment.py', description='CT/CTA discrimination to run in SHAIP', epilog='If no phases are specified, program does nothing - exits') parser.add_argument('-t', '--train', help='perform model training', action='store_true', default=False) parser.add_argument('-p', '--predict', help='perform prediction over the test set', action='store_true', default=False) parser.add_argument('-e', '--evaluate', help='generate results', action='store_true', default=False) args = parser.parse_args(argv[1:]) if not any([args.train, args.predict, args.evaluate]): parser.print_help() sys.exit(0) self.args = args def setup_logging(self): # see https://docs.python.org/2.4/lib/multiple-destinations.html logger = logging.getLogger('') logger.setLevel(logging.DEBUG) if len(logger.handlers) <= 1: # avoid double setup which can happen in unit tests # Define a Handler which writes INFO messages or higher to the sys.stderr console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) simple_formatter = logging.Formatter('%(levelname)-8s %(message)s') console_handler.setFormatter(simple_formatter) # Set up logging to file for DEBUG messages or higher logfile_path = os.path.join(self.shaip.results_dir, 'kaggle-ctmi.log') logfile_handler = logging.FileHandler(filename=logfile_path) logfile_handler.setLevel(logging.DEBUG) verbose_formatter = logging.Formatter( '%(asctime)s - %(levelname)s - %(message)s', datefmt='%d/%m/%y %H:%M') logfile_handler.setFormatter(verbose_formatter) # add the handlers to the logger logger.addHandler(console_handler) logger.addHandler(logfile_handler) # Silence matplotlib debug messages mpl_logger = logging.getLogger('matplotlib.font_manager') mpl_logger.setLevel(logging.WARNING) def main(self, argv): """ Main Experiment entry point. argv is the full argument list, so argv[0] is the program name. In production call as main(sys.argv)""" np.random.seed(42) self.setup_logging() self.command_line(argv) start_time = time.time() logging.info("Starting Kaggle-CTMI Experiment\n") logging.info("Finding data and groundtruth...") cohort = Cohort(self.shaip) train_cohort, test_cohort = cohort.split_cohort_train_test(0.3) logging.info("Found %d datasets", cohort.size) if self.args.train: logging.info("Training on %d datasets...", train_cohort.size) model = self.algorithm.train(train_cohort) Algorithm.save_model(model, self.shaip.models_dir + 'model') else: logging.info("Skipping training, model saved from earlier run") model = self.algorithm.load_model(self.shaip.models_dir + 'model') if self.args.predict: logging.info("Prediction on %d datasets...", test_cohort.size) test_predictions = self.algorithm.predict(model, test_cohort) else: logging.info( "Skipping prediction, using predictions from earlier run") # TODO: need to sort out caching of predictions test_predictions = None if self.args.evaluate: logging.info( "Generating results to ShaipWorkspace/outputs/results/index.html..." ) self.results.show_results(train_cohort, test_cohort, self.algorithm.history, test_predictions) logging.info("Kaggle-CTMI Experiment done in %4.1f seconds.\n", (time.time() - start_time))
def test_build_model(): algorithm = Algorithm() model = algorithm.build_model() model.summary()
def test_train(): algorithm = Algorithm() cohort = Cohort(ShaipWorkspace()) model = algorithm.train(cohort) assert model is not None
def test_preprocessed_cohort(): algorithm = Algorithm() cohort = Cohort(ShaipWorkspace()) ppimages = algorithm.preprocessed_images(cohort) assert len(ppimages) == cohort.size