def test_append_layer(self): test_model = Model() test_model.append_layer(Dense(node_count=10, input_length=100)) self.assertEqual(test_model.depth, 1) self.assertEqual(test_model.input_length, 100) self.assertRaises(ValueError, test_model.append_layer, Dense(node_count=1, input_length=5))
def pick_best(self, dataset, num_best=1): """ Assuming that checkpoints have been saved during training, deletes all but those num_best that are performing best against the given Dataset. """ if num_best >= len(self.checkpoints): return scorer = Scorer(dataset) scores = {} with tempfile.TemporaryDirectory() as temp_dir: for path in self.checkpoints: parsed = Model.load(path).parse(dataset) output_fp = os.path.join(temp_dir, os.path.basename(path)) Dataset(output_fp).write_graphs(parsed) scores[path] = scorer.score(Dataset(output_fp)) print('{}: {:.2f}'.format(path, scores[path])) best = [(uas, path) for path, uas in scores.items()] best = sorted(best, reverse=True)[:num_best] best = [item[1] for item in best] for path in self.checkpoints: if path not in best: os.remove(path) print('kept: {}'.format(', '.join(best)))
def save_checkpoint(self, extractor, neural_net, epoch): """ Saves a checkpoints, i.e. an mstnn model storing the weights at the end of a training epoch. """ name = '{}-e{:02}'.format(self.model_name, epoch) path = os.path.join(self.model_dir, name) Model(extractor, neural_net).save(path) self.checkpoints.append(path)
def test_cost_fn(self): test_model = Model() test_model.append_layer(Dense(node_count=1, input_length=1000)) test_input = np.random.random_sample((1000, 1000)) test_label = np.random.randint(0, 2, (1000, 1)) output = test_model(test_input) expected_cost = 0 for label, pred in zip(test_label, output): try: if label == 1: expected_cost += log(pred) else: expected_cost += log(1 - pred) except ValueError: expected_cost += 100 expected_cost /= len(test_label) cost = np.mean(test_model.cost_fn(test_input, test_label)) self.assertEqual(cost, expected_cost)
def test_call(self): test_model = Model() test_model.append_layer(Dense(node_count=10, input_length=100)) test_model.append_layer(Dense(node_count=1, input_length=10)) test_input = np.random.random_sample((4, 100)) model_weights = test_model.weights expected_output = test_input for layer_weight in model_weights: expected_output = np.matmul(expected_output, layer_weight) np.testing.assert_array_equal(test_model(test_input), expected_output)
def _build_model(self, dataset): self._model_train = Model(modeltype="train", dataset=dataset, **self._model_params) if dataset.using_validation_set(): self._model_val = Model(modeltype="val", dataset=dataset, **self._model_params) if dataset.using_test_set(): self._model_test = Model(modeltype="test", dataset=dataset, **self._model_params) self._analysis = Analysis(self._name, dataset, self._model_train, self._model_val, self._model_test) self._summary_op = tf.summary.merge_all()
class Manager(object): def __init__(self, name, num_epochs, model_params, data_params, restart_filename=None): self._name = name self._restart_filename = restart_filename self._num_epochs = num_epochs self._epoch = 0 self._model_params = model_params self._data_params = data_params self._model_train = None self._model_val = None self._model_test = None self._analysis = None self._sess = None self._saver = None self._summary_op = None self._summary_writer = None self._name = name + '_ID_%d' % np.random.randint(100000, 1000000) logger.LOG_FILENAME = self._name + ".log" self._tensorboard_dir = 'tensorboard_dir/' log("Tensorflow version:", tf.__version__) log("This calculation's name: %s" % self._name) log("Logging into file: %s" % ('logs/' + logger.LOG_FILENAME)) log("Using tensorboard dir: %s" % self._tensorboard_dir) log("\nPrinting this calculation's parameters:") pp = pprint.PrettyPrinter(indent=2) log("\nmodel params:\n", pp.pformat(self._model_params)) log("\ndata params:\n", pp.pformat(self._data_params)) log("\nnum_epochs:", self._num_epochs) ### Internal class functions ### def _build_model(self, dataset): self._model_train = Model(modeltype="train", dataset=dataset, **self._model_params) if dataset.using_validation_set(): self._model_val = Model(modeltype="val", dataset=dataset, **self._model_params) if dataset.using_test_set(): self._model_test = Model(modeltype="test", dataset=dataset, **self._model_params) self._analysis = Analysis(self._name, dataset, self._model_train, self._model_val, self._model_test) self._summary_op = tf.summary.merge_all() def _init_saver_and_variables(self): assert self._sess is not None tf.global_variables_initializer().run(session=self._sess) variables_to_save = tf.get_collection(tf.GraphKeys.VARIABLES, scope="model_scope/var_scope/") log("\nVariables to be saved:") for v in variables_to_save: log(v.name) self._saver = tf.train.Saver(variables_to_save, max_to_keep=1) if self._restart_filename is not None: restore_file_name = self._tensorboard_dir + self._restart_filename + "/PARAMETERSTATE.ckpt" log("\nTrying restore file: " + restore_file_name) try: self._saver.restore(self._sess, restore_file_name) log("Successfully restored variables from checkpoint!") except Exception as exception: log("\n!! WARNING: !!") log( "Tried to restore variables from checkpoint, but failed with Exception:", exception.message) log("Will start from freshly initialized variables.") self._summary_writer = tf.summary.FileWriter(self._tensorboard_dir + self._name, graph=self._sess.graph) self._sess.graph.finalize() log("\nComputation graph finalized.") def _save_checkpoint(self): assert (self._sess is not None) and (self._saver is not None) start_time_checkpoint = time.time() save_path = self._saver.save( self._sess, self._tensorboard_dir + self._name + "/PARAMETERSTATE.ckpt") total_time_checkpoint = time.time() - start_time_checkpoint log("\nModel saved in file: %s in %0.2f secs." % (save_path, total_time_checkpoint)) def _update_tensorboard(self): assert (self._sess is not None) and (self._summary_op is not None) and (self._summary_writer is not None) log('Updating tensorboard for epoch %d' % self._epoch) summary_str = self._sess.run(self._summary_op) self._summary_writer.add_summary(summary_str, global_step=self._epoch) self._summary_writer.flush() def _train_model(self): assert self._sess is not None log("\n\nStarting training. Number of epochs:", self._num_epochs) for step_full_train in xrange(self._num_epochs): self._epoch += 1 log("\nRunning epoch:", self._epoch) start_time = time.time() rec_loss_train, kld_loss_train, elbo_loss_train = self._model_train.train_epoch( sess=self._sess) log("loss train set, ELBO (batchnorm in training mode):", elbo_loss_train) log("loss train set, reconst. term (batchnorm in training mode):", rec_loss_train) log("loss train set, KLD term (batchnorm in training mode):", kld_loss_train) if self._model_val is not None: elbo_loss_val = self._model_val.calc_elbo(sess=self._sess) log("loss val set, ELBO (batchnorm in inference mode):", elbo_loss_val) if self._model_test is not None: elbo_loss_test = self._model_test.calc_elbo(sess=self._sess) log("loss test set, ELBO (batchnorm in inference mode):", elbo_loss_test) end_time = time.time() log("Epoch runtime in seconds (train set):", end_time - start_time) self._update_tensorboard() if step_full_train % 500 == 0 and step_full_train > 0: self._save_checkpoint() self._save_checkpoint() def _run_analysis(self): self._analysis.draw_samples_from_data(sess=self._sess) self._analysis.draw_samples_from_model(sess=self._sess) self._analysis.compare_input_reconst(sess=self._sess) analysis_results_dict = self._analysis.calc_metrics(sess=self._sess) return analysis_results_dict ### Functions for interaction with external code that is not part of this class ### def run_model(self): log("\n\n --- Loading data --- ") dataset = datasets.Datasets(**self._data_params) log("\n\n --- Building models --- ") self._build_model(dataset) with tf.Session() as self._sess: log("\n\n --- Initializing variables --- ") self._init_saver_and_variables() log("\n\n --- Training model --- ") if self._num_epochs > 0: self._train_model() log("\n\nTraining models done") log("\n\n --- Analysing model and drawing samples from model --- ") self._run_analysis() log("\n\n --- FINISHED --- ") tf.reset_default_graph()
def train(self) -> None: # Loading in the data. data_transforms = self._get_data_transforms() image_datasets = { 'train': Dataset(metadata_paths=self._train_patch_metadata_paths, transform=data_transforms['train'], class_idx_path=self._class_idx_path, patch_size=self._patch_size), 'val': Dataset(metadata_paths=self._val_patch_metadata_paths, transform=data_transforms['val'], class_idx_path=self._class_idx_path, patch_size=self._patch_size) } dataloaders = { x: torch.utils.data.DataLoader(dataset=image_datasets[x], batch_size=self._batch_size, shuffle=(x is "train"), num_workers=self._num_workers) for x in ("train", "val") } dataset_sizes = {x: len(image_datasets[x]) for x in ("train", "val")} logging.info( f"{self._num_classes} classes: {self._classes}\n" f"num train images {len(dataloaders['train']) * self._batch_size}\n" f"num val images {len(dataloaders['val']) * self._batch_size}\n" f"CUDA is_available: {torch.cuda.is_available()}") model = Model(num_classes=self._num_classes, num_layers=self._num_layers, pretrain=self._pretrain, spatial_sensitive=self._spatial_sensitive, n_spatial_features=self._n_spatial_features) model = model.to(device=self._device) optimizer = optim.Adam(params=model.parameters(), lr=self._learning_rate, weight_decay=self._weight_decay) scheduler = lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=self._learning_rate_decay) # Initialize the model. if self._resume_checkpoint: ckpt = torch.load(f=self._resume_checkpoint_path) model.load_state_dict(state_dict=ckpt["model_state_dict"]) optimizer.load_state_dict(state_dict=ckpt["optimizer_state_dict"]) scheduler.load_state_dict(state_dict=ckpt["scheduler_state_dict"]) start_epoch = ckpt["epoch"] logging.info(f"model loaded from {self._resume_checkpoint_path}") else: start_epoch = 0 # Print the model hyperparameters. self._print_params() # Logging the model after every epoch. # Confirm the output directory exists. self._log_csv.parent.mkdir(parents=True, exist_ok=True) with self._log_csv.open(mode="w") as writer: writer.write("epoch,train_loss,train_acc,val_loss,val_acc\n") # Train the model. self._train_helper(model=model, dataloaders=dataloaders, dataset_sizes=dataset_sizes, loss_fn=self._search_loss_fn(), optimizer=optimizer, scheduler=scheduler, start_epoch=start_epoch, writer=writer)
def test_model_input_length(self): test_model = Model() self.assertEqual(test_model.input_length, 0) test_model.append_layer(Dense(node_count=100, input_length=1000)) self.assertEqual(test_model.input_length, 1000)
def test_init(self): test_model = Model()