示例#1
0
    def test_append_layer(self):
        test_model = Model()
        test_model.append_layer(Dense(node_count=10, input_length=100))
        self.assertEqual(test_model.depth, 1)
        self.assertEqual(test_model.input_length, 100)

        self.assertRaises(ValueError, test_model.append_layer,
                          Dense(node_count=1, input_length=5))
示例#2
0
文件: train.py 项目: pavelsof/mstnn
    def pick_best(self, dataset, num_best=1):
        """
		Assuming that checkpoints have been saved during training, deletes all
		but those num_best that are performing best against the given Dataset.
		"""
        if num_best >= len(self.checkpoints):
            return

        scorer = Scorer(dataset)
        scores = {}

        with tempfile.TemporaryDirectory() as temp_dir:
            for path in self.checkpoints:
                parsed = Model.load(path).parse(dataset)

                output_fp = os.path.join(temp_dir, os.path.basename(path))
                Dataset(output_fp).write_graphs(parsed)

                scores[path] = scorer.score(Dataset(output_fp))
                print('{}: {:.2f}'.format(path, scores[path]))

        best = [(uas, path) for path, uas in scores.items()]
        best = sorted(best, reverse=True)[:num_best]
        best = [item[1] for item in best]

        for path in self.checkpoints:
            if path not in best:
                os.remove(path)

        print('kept: {}'.format(', '.join(best)))
示例#3
0
文件: train.py 项目: pavelsof/mstnn
    def save_checkpoint(self, extractor, neural_net, epoch):
        """
		Saves a checkpoints, i.e. an mstnn model storing the weights at the end
		of a training epoch.
		"""
        name = '{}-e{:02}'.format(self.model_name, epoch)
        path = os.path.join(self.model_dir, name)

        Model(extractor, neural_net).save(path)
        self.checkpoints.append(path)
示例#4
0
    def test_cost_fn(self):
        test_model = Model()
        test_model.append_layer(Dense(node_count=1, input_length=1000))

        test_input = np.random.random_sample((1000, 1000))
        test_label = np.random.randint(0, 2, (1000, 1))
        output = test_model(test_input)
        expected_cost = 0
        for label, pred in zip(test_label, output):
            try:
                if label == 1:
                    expected_cost += log(pred)
                else:
                    expected_cost += log(1 - pred)
            except ValueError:
                expected_cost += 100
        expected_cost /= len(test_label)
        cost = np.mean(test_model.cost_fn(test_input, test_label))
        self.assertEqual(cost, expected_cost)
示例#5
0
    def test_call(self):
        test_model = Model()
        test_model.append_layer(Dense(node_count=10, input_length=100))
        test_model.append_layer(Dense(node_count=1, input_length=10))

        test_input = np.random.random_sample((4, 100))
        model_weights = test_model.weights
        expected_output = test_input
        for layer_weight in model_weights:
            expected_output = np.matmul(expected_output, layer_weight)

        np.testing.assert_array_equal(test_model(test_input), expected_output)
示例#6
0
    def _build_model(self, dataset):
        self._model_train = Model(modeltype="train",
                                  dataset=dataset,
                                  **self._model_params)

        if dataset.using_validation_set():
            self._model_val = Model(modeltype="val",
                                    dataset=dataset,
                                    **self._model_params)
        if dataset.using_test_set():
            self._model_test = Model(modeltype="test",
                                     dataset=dataset,
                                     **self._model_params)

        self._analysis = Analysis(self._name, dataset, self._model_train,
                                  self._model_val, self._model_test)

        self._summary_op = tf.summary.merge_all()
示例#7
0
class Manager(object):
    def __init__(self,
                 name,
                 num_epochs,
                 model_params,
                 data_params,
                 restart_filename=None):
        self._name = name
        self._restart_filename = restart_filename

        self._num_epochs = num_epochs
        self._epoch = 0

        self._model_params = model_params
        self._data_params = data_params

        self._model_train = None
        self._model_val = None
        self._model_test = None
        self._analysis = None

        self._sess = None
        self._saver = None
        self._summary_op = None
        self._summary_writer = None

        self._name = name + '_ID_%d' % np.random.randint(100000, 1000000)
        logger.LOG_FILENAME = self._name + ".log"

        self._tensorboard_dir = 'tensorboard_dir/'
        log("Tensorflow version:", tf.__version__)
        log("This calculation's name: %s" % self._name)
        log("Logging into file: %s" % ('logs/' + logger.LOG_FILENAME))
        log("Using tensorboard dir: %s" % self._tensorboard_dir)

        log("\nPrinting this calculation's parameters:")
        pp = pprint.PrettyPrinter(indent=2)
        log("\nmodel params:\n", pp.pformat(self._model_params))
        log("\ndata params:\n", pp.pformat(self._data_params))
        log("\nnum_epochs:", self._num_epochs)

    ### Internal class functions ###

    def _build_model(self, dataset):
        self._model_train = Model(modeltype="train",
                                  dataset=dataset,
                                  **self._model_params)

        if dataset.using_validation_set():
            self._model_val = Model(modeltype="val",
                                    dataset=dataset,
                                    **self._model_params)
        if dataset.using_test_set():
            self._model_test = Model(modeltype="test",
                                     dataset=dataset,
                                     **self._model_params)

        self._analysis = Analysis(self._name, dataset, self._model_train,
                                  self._model_val, self._model_test)

        self._summary_op = tf.summary.merge_all()

    def _init_saver_and_variables(self):
        assert self._sess is not None
        tf.global_variables_initializer().run(session=self._sess)

        variables_to_save = tf.get_collection(tf.GraphKeys.VARIABLES,
                                              scope="model_scope/var_scope/")
        log("\nVariables to be saved:")
        for v in variables_to_save:
            log(v.name)
        self._saver = tf.train.Saver(variables_to_save, max_to_keep=1)

        if self._restart_filename is not None:
            restore_file_name = self._tensorboard_dir + self._restart_filename + "/PARAMETERSTATE.ckpt"
            log("\nTrying restore file: " + restore_file_name)
            try:
                self._saver.restore(self._sess, restore_file_name)
                log("Successfully restored variables from checkpoint!")
            except Exception as exception:
                log("\n!! WARNING: !!")
                log(
                    "Tried to restore variables from checkpoint, but failed with Exception:",
                    exception.message)
                log("Will start from freshly initialized variables.")

        self._summary_writer = tf.summary.FileWriter(self._tensorboard_dir +
                                                     self._name,
                                                     graph=self._sess.graph)

        self._sess.graph.finalize()
        log("\nComputation graph finalized.")

    def _save_checkpoint(self):
        assert (self._sess is not None) and (self._saver is not None)
        start_time_checkpoint = time.time()
        save_path = self._saver.save(
            self._sess,
            self._tensorboard_dir + self._name + "/PARAMETERSTATE.ckpt")
        total_time_checkpoint = time.time() - start_time_checkpoint
        log("\nModel saved in file: %s in %0.2f secs." %
            (save_path, total_time_checkpoint))

    def _update_tensorboard(self):
        assert (self._sess
                is not None) and (self._summary_op
                                  is not None) and (self._summary_writer
                                                    is not None)
        log('Updating tensorboard for epoch %d' % self._epoch)
        summary_str = self._sess.run(self._summary_op)
        self._summary_writer.add_summary(summary_str, global_step=self._epoch)
        self._summary_writer.flush()

    def _train_model(self):
        assert self._sess is not None

        log("\n\nStarting training. Number of epochs:", self._num_epochs)
        for step_full_train in xrange(self._num_epochs):
            self._epoch += 1
            log("\nRunning epoch:", self._epoch)

            start_time = time.time()
            rec_loss_train, kld_loss_train, elbo_loss_train = self._model_train.train_epoch(
                sess=self._sess)
            log("loss train set, ELBO (batchnorm in training mode):",
                elbo_loss_train)
            log("loss train set, reconst. term (batchnorm in training mode):",
                rec_loss_train)
            log("loss train set, KLD term (batchnorm in training mode):",
                kld_loss_train)

            if self._model_val is not None:
                elbo_loss_val = self._model_val.calc_elbo(sess=self._sess)
                log("loss val set, ELBO (batchnorm in inference mode):",
                    elbo_loss_val)
            if self._model_test is not None:
                elbo_loss_test = self._model_test.calc_elbo(sess=self._sess)
                log("loss test set, ELBO (batchnorm in inference mode):",
                    elbo_loss_test)
            end_time = time.time()

            log("Epoch runtime in seconds (train set):", end_time - start_time)
            self._update_tensorboard()

            if step_full_train % 500 == 0 and step_full_train > 0:
                self._save_checkpoint()

        self._save_checkpoint()

    def _run_analysis(self):
        self._analysis.draw_samples_from_data(sess=self._sess)
        self._analysis.draw_samples_from_model(sess=self._sess)
        self._analysis.compare_input_reconst(sess=self._sess)
        analysis_results_dict = self._analysis.calc_metrics(sess=self._sess)
        return analysis_results_dict

    ### Functions for interaction with external code that is not part of this class ###

    def run_model(self):
        log("\n\n --- Loading data --- ")
        dataset = datasets.Datasets(**self._data_params)

        log("\n\n --- Building models --- ")
        self._build_model(dataset)

        with tf.Session() as self._sess:
            log("\n\n --- Initializing variables --- ")
            self._init_saver_and_variables()

            log("\n\n --- Training model --- ")
            if self._num_epochs > 0:
                self._train_model()
            log("\n\nTraining models done")

            log("\n\n --- Analysing model and drawing samples from model --- ")
            self._run_analysis()

            log("\n\n --- FINISHED --- ")

        tf.reset_default_graph()
示例#8
0
    def train(self) -> None:
        # Loading in the data.
        data_transforms = self._get_data_transforms()

        image_datasets = {
            'train':
            Dataset(metadata_paths=self._train_patch_metadata_paths,
                    transform=data_transforms['train'],
                    class_idx_path=self._class_idx_path,
                    patch_size=self._patch_size),
            'val':
            Dataset(metadata_paths=self._val_patch_metadata_paths,
                    transform=data_transforms['val'],
                    class_idx_path=self._class_idx_path,
                    patch_size=self._patch_size)
        }

        dataloaders = {
            x: torch.utils.data.DataLoader(dataset=image_datasets[x],
                                           batch_size=self._batch_size,
                                           shuffle=(x is "train"),
                                           num_workers=self._num_workers)
            for x in ("train", "val")
        }
        dataset_sizes = {x: len(image_datasets[x]) for x in ("train", "val")}

        logging.info(
            f"{self._num_classes} classes: {self._classes}\n"
            f"num train images {len(dataloaders['train']) * self._batch_size}\n"
            f"num val images {len(dataloaders['val']) * self._batch_size}\n"
            f"CUDA is_available: {torch.cuda.is_available()}")

        model = Model(num_classes=self._num_classes,
                      num_layers=self._num_layers,
                      pretrain=self._pretrain,
                      spatial_sensitive=self._spatial_sensitive,
                      n_spatial_features=self._n_spatial_features)

        model = model.to(device=self._device)
        optimizer = optim.Adam(params=model.parameters(),
                               lr=self._learning_rate,
                               weight_decay=self._weight_decay)
        scheduler = lr_scheduler.ExponentialLR(optimizer=optimizer,
                                               gamma=self._learning_rate_decay)

        # Initialize the model.
        if self._resume_checkpoint:
            ckpt = torch.load(f=self._resume_checkpoint_path)
            model.load_state_dict(state_dict=ckpt["model_state_dict"])
            optimizer.load_state_dict(state_dict=ckpt["optimizer_state_dict"])
            scheduler.load_state_dict(state_dict=ckpt["scheduler_state_dict"])
            start_epoch = ckpt["epoch"]
            logging.info(f"model loaded from {self._resume_checkpoint_path}")
        else:
            start_epoch = 0

        # Print the model hyperparameters.
        self._print_params()

        # Logging the model after every epoch.
        # Confirm the output directory exists.
        self._log_csv.parent.mkdir(parents=True, exist_ok=True)

        with self._log_csv.open(mode="w") as writer:
            writer.write("epoch,train_loss,train_acc,val_loss,val_acc\n")
            # Train the model.
            self._train_helper(model=model,
                               dataloaders=dataloaders,
                               dataset_sizes=dataset_sizes,
                               loss_fn=self._search_loss_fn(),
                               optimizer=optimizer,
                               scheduler=scheduler,
                               start_epoch=start_epoch,
                               writer=writer)
示例#9
0
    def test_model_input_length(self):
        test_model = Model()
        self.assertEqual(test_model.input_length, 0)

        test_model.append_layer(Dense(node_count=100, input_length=1000))
        self.assertEqual(test_model.input_length, 1000)
示例#10
0
 def test_init(self):
     test_model = Model()