Пример #1
0
    def __init__(self, model, dataset, learning_rule, log=None):
        self.model = model
        self.dataset = dataset
        self.learning_rule = learning_rule
        self.log = log

        if self.log is None:
            # use default Log setting
            self.log = Log(logger=int_logger)

        self.log.logger.info("..begin setting up train object")
        self._setup()
Пример #2
0
class TrainObject:
    def __init__(self, model, dataset, learning_rule, log=None):
        self.model = model
        self.dataset = dataset
        self.learning_rule = learning_rule
        self.log = log

        if self.log is None:
            # use default Log setting
            self.log = Log(logger=int_logger)

        self.log.logger.info("..begin setting up train object")
        self._setup()

    def _setup(self):

        # ================[ check output dim with target size ]================#

        assert self.model.layers[-1].dim == self.dataset.target_size(), (
            "output dim: "
            + str(self.model.layers[-1].dim)
            + ", is not equal to target size: "
            + str(self.dataset.target_size())
        )

        # ===================[ build params and deltas list ]==================#

        def is_shared_var(var):
            return (
                var.__class__.__name__ == "TensorSharedVariable"
                or var.__class__.__name__ == "CudaNdarraySharedVariable"
            )

        params = []
        deltas = []

        prev_layer_dim = self.model.input_dim
        for layer in self.model.layers:
            if is_shared_var(layer.W):
                params += [layer.W]
                deltas += [theano.shared(np.zeros((prev_layer_dim, layer.dim), dtype=floatX))]

            else:
                self.log.logger.info(layer.W.name + " is " + layer.W.__class__.__name__ + " but not SharedVariable.")

            if is_shared_var(layer.b):
                params += [layer.b]
                deltas += [theano.shared(np.zeros(layer.dim, dtype=floatX))]

            else:
                self.log.logger.info(layer.b.name + " is " + layer.b.__class__.__name__ + " but not SharedVariable.")

            prev_layer_dim = layer.dim

        # =====================[ training params updates ]=====================#
        # UPDATES:
        # (Normal momentum)
        # 			delta := momentum * delta -
        #                learning_rate * (d cost(param) / d param)
        # 			param := param + delta
        #
        # (Nesterov momentum)
        # 			delta := momentum * delta -
        #                learning_rate * (d cost(param + momentum*delta) / d param)
        # 			param := param + delta
        # ---------------------------------------------------------------------#

        self.log.logger.info("..number of update params: " + str(len(params)))

        train_x = T.matrix("train_x", dtype=floatX)
        train_y = T.matrix("train_y", dtype=floatX)

        assert (
            self.learning_rule.momentum_type == "normal" or self.learning_rule.momentum_type == "nesterov"
        ), "momentum is not normal | nesterov"

        train_updates = []

        if self.learning_rule.momentum_type == "normal":

            train_y_pred, train_layers_stats = self.model.train_fprop(train_x)
            train_cost = self.learning_rule.cost.get_cost(train_y, train_y_pred)

            if self.learning_rule.L1_lambda is not None:
                self.log.logger.info("..applying L1_lambda: %f" % self.learning_rule.L1_lambda)
                L1 = theano.shared(0.0)
                for layer in self.model.layers:
                    if is_shared_var(layer.W):
                        L1 += T.sqrt((layer.W ** 2).sum(axis=0)).sum()

                    else:
                        self.log.logger.info(
                            layer.W.name + " is " + layer.W.__class__.__name__ + " is not used in L1 regularization"
                        )
                train_cost += self.learning_rule.L1_lambda * L1

            if self.learning_rule.L2_lambda is not None:
                self.log.logger.info("..applying L2_lambda: %f" % self.learning_rule.L2_lambda)
                L2 = theano.shared(0.0)
                for layer in self.model.layers:
                    if is_shared_var(layer.W):
                        L2 += ((layer.W ** 2).sum(axis=0)).sum()

                    else:
                        self.log.logger.info(
                            layer.W.name + " is " + layer.W.__class__.__name__ + " is not used in L2 regularization"
                        )
                train_cost += self.learning_rule.L2_lambda * L2

            gparams = T.grad(train_cost, params)

            for delta, param, gparam in zip(deltas, params, gparams):
                train_updates += [
                    (delta, self.learning_rule.momentum * delta - self.learning_rule.learning_rate * gparam)
                ]

                # applying max_col_norm regularisation
                if param.name[0] == "W" and self.learning_rule.max_col_norm is not None:
                    W_update = param + delta
                    w_len = T.sqrt((W_update ** 2).sum(axis=0))
                    divisor = (w_len <= self.learning_rule.max_col_norm) + (
                        w_len > self.learning_rule.max_col_norm
                    ) * w_len / self.learning_rule.max_col_norm
                    W_update = W_update / divisor.reshape((1, divisor.shape[0]))
                    train_updates += [(param, W_update)]

                else:
                    train_updates += [(param, param + delta)]

        elif self.learning_rule.momentum_type == "nesterov":
            raise NotImplementedError("nesterov not implemented yet")

        # ----[ append updates of stats from each layer to train updates ]-----#

        self.train_stats_names, train_stats_vars = split_list(train_layers_stats)
        self.train_stats_shared = generate_shared_list(train_stats_vars)
        train_stats_updates = merge_lists(self.train_stats_shared, train_stats_vars)
        train_updates += train_stats_updates

        # -------------------------[ train functions ]-------------------------#

        self.log.logger.info("..begin compiling functions")

        train_stopping_cost = self.learning_rule.stopping_criteria["cost"].get_cost(train_y, train_y_pred)

        self.training = theano.function(
            inputs=[train_x, train_y],
            outputs=(train_stopping_cost, train_cost),
            updates=train_updates,
            on_unused_input="warn",
        )

        self.log.logger.info("..training function compiled")

        # ======================[ testing params updates ]=====================#

        test_x = T.matrix("test_x", dtype=floatX)
        test_y = T.matrix("test_y", dtype=floatX)
        test_y_pred, test_layers_stats = self.model.test_fprop(test_x)

        # -----[ append updates of stats from each layer to test updates ]-----#

        self.test_stats_names, test_stats_vars = split_list(test_layers_stats)
        self.test_stats_shared = generate_shared_list(test_stats_vars)
        test_stats_updates = merge_lists(self.test_stats_shared, test_stats_vars)

        # -------------------------[ test functions ]--------------------------#

        test_stopping_cost = self.learning_rule.stopping_criteria["cost"].get_cost(test_y, test_y_pred)
        test_cost = self.learning_rule.cost.get_cost(test_y, test_y_pred)

        self.testing = theano.function(
            inputs=[test_x, test_y],
            outputs=(test_stopping_cost, test_cost),
            updates=test_stats_updates,
            on_unused_input="warn",
        )

        self.log.logger.info("..testing function compiled")

    def run(self):

        train_set = self.dataset.get_train()
        valid_set = self.dataset.get_valid()
        test_set = self.dataset.get_test()

        best_train_error = float("inf")
        best_valid_error = float("inf")
        best_test_error = float("inf")

        mean_train_error = float("inf")
        mean_valid_error = float("inf")
        mean_test_error = float("inf")

        mean_train_cost = float("inf")
        mean_valid_cost = float("inf")
        mean_test_cost = float("inf")

        train_stats_names = []
        train_stats_values = []

        valid_stats_names = []
        valid_stats_values = []

        test_stats_names = []
        test_stats_values = []

        epoch = 1
        error_dcr = 0
        self.best_epoch_so_far = 0

        while self.continue_learning(epoch, error_dcr, best_valid_error):

            start_time = time.time()

            # ======================[ Training Progress ]======================#
            if train_set.dataset_size() > 0:

                self.log.logger.info("..training " + self.dataset.__class__.__name__ + " in progress")

                assert (
                    train_set.feature_size() == self.model.input_dim
                    and train_set.target_size() == self.model.layers[-1].dim
                ), (
                    "train_set input or target size does not match the model "
                    + "input or target size. "
                    + "\ntrain_set feature size: "
                    + str(train_set.feature_size())
                    + "\nmodel input dim: "
                    + str(self.model.input_dim)
                    + "\ntrain_set target size: "
                    + str(train_set.target_size())
                    + "\nmodel output dim: "
                    + str(self.model.layers[-1].dim)
                )

                num_examples = 0
                total_cost = 0.0
                total_stopping_cost = 0.0

                train_stats_names = ["train_" + name for name in self.train_stats_names]
                train_stats_values = np.zeros(len(train_stats_names), dtype=floatX)

                for idx in train_set:
                    stopping_cost, cost = self.training(train_set.X[idx], train_set.y[idx])

                    total_cost += cost * len(idx)
                    total_stopping_cost += stopping_cost * len(idx)
                    num_examples += len(idx)

                    train_stats_values += len(idx) * get_shared_values(self.train_stats_shared)

                mean_train_error = total_stopping_cost / num_examples
                mean_train_cost = total_cost / num_examples

                train_stats_values /= num_examples

                if mean_train_error < best_train_error:
                    best_train_error = mean_train_error

            # =====================[ Validating Progress ]=====================#
            if valid_set.dataset_size() > 0:

                self.log.logger.info("..validating " + self.dataset.__class__.__name__ + " in progress")

                assert (
                    valid_set.feature_size() == self.model.input_dim
                    and valid_set.target_size() == self.model.layers[-1].dim
                ), (
                    "valid_set input or target size does not match the model "
                    + "input or target size. "
                    + "\nvalid_set feature size: "
                    + str(valid_set.feature_size())
                    + "\nmodel input dim: "
                    + str(self.model.input_dim)
                    + "\nvalid_set target size: "
                    + str(valid_set.target_size())
                    + "\nmodel output dim: "
                    + str(self.model.layers[-1].dim)
                )

                num_examples = 0
                total_cost = 0.0
                total_stopping_cost = 0.0

                valid_stats_names = ["valid_" + name for name in self.test_stats_names]
                valid_stats_values = np.zeros(len(valid_stats_names), dtype=floatX)

                for idx in valid_set:
                    stopping_cost, cost = self.testing(valid_set.X[idx], valid_set.y[idx])

                    total_cost += cost * len(idx)
                    total_stopping_cost += stopping_cost * len(idx)
                    num_examples += len(idx)

                    valid_stats_values += len(idx) * get_shared_values(self.test_stats_shared)

                mean_valid_error = total_stopping_cost / num_examples
                mean_valid_cost = total_cost / num_examples

                valid_stats_values /= num_examples

                if best_valid_error - mean_valid_error > 0:
                    error_dcr = best_valid_error - mean_valid_error
                    best_valid_error = mean_valid_error

            # ======================[ Testing Progress ]=======================#
            if test_set.dataset_size() > 0:

                self.log.logger.info("..testing " + self.dataset.__class__.__name__ + " in progress")

                assert (
                    test_set.feature_size() == self.model.input_dim
                    and test_set.target_size() == self.model.layers[-1].dim
                ), (
                    "test_set input or target size does not match the model "
                    + "input or target size. "
                    + "\ntest_set feature size: "
                    + str(test_set.feature_size())
                    + "\nmodel input dim: "
                    + str(self.model.input_dim)
                    + "\ntest_set target size: "
                    + str(test_set.target_size())
                    + "\nmodel output dim: "
                    + str(self.model.layers[-1].dim)
                )

                num_examples = 0
                total_cost = 0.0
                total_stopping_cost = 0.0

                test_stats_names = ["test_" + name for name in self.test_stats_names]
                test_stats_values = np.zeros(len(test_stats_names), dtype=floatX)

                for idx in test_set:
                    stopping_cost, cost = self.testing(test_set.X[idx], test_set.y[idx])

                    total_cost += cost * len(idx)
                    total_stopping_cost += stopping_cost * len(idx)
                    num_examples += len(idx)

                    test_stats_values += len(idx) * get_shared_values(self.test_stats_shared)

                test_stats_values /= num_examples

                mean_test_error = total_stopping_cost / num_examples
                mean_test_cost = total_cost / num_examples

                # ======[ save model, save hyperparams, send to database ]=====#
                if mean_test_error < best_test_error:

                    best_test_error = mean_test_error

                    if self.log.save_model:
                        self.log._save_model(self.model)
                        self.log.logger.info("..model saved")

                    if self.log.save_hyperparams:
                        self.log._save_hyperparams(self.learning_rule)
                        self.log.logger.info("..hyperparams saved")

                    if self.log.send_to_database:
                        self.log._send_to_database(
                            epoch,
                            self.dataset.__class__.__name__,
                            self.model.rand_seed,
                            str([layer.dropout_below for layer in self.model.layers]),
                            self.learning_rule,
                            best_train_error,
                            best_valid_error,
                            best_test_error,
                            self.dataset.batch_size,
                            len(self.model.layers),
                            str([layer.dim for layer in self.model.layers]),
                            self.dataset.preprocessor.__class__.__name__,
                        )

                        self.log.logger.info(
                            "..sent to database: %s:%s" % (self.log.send_to_database, self.log.experiment_name)
                        )

            end_time = time.time()

            # =========================[ log outputs ]=========================#

            merged_train = merge_lists(train_stats_names, train_stats_values)
            merged_valid = merge_lists(valid_stats_names, valid_stats_values)
            merged_test = merge_lists(test_stats_names, test_stats_values)

            stopping_cost_type = self.learning_rule.stopping_criteria["cost"].type
            outputs = [
                ("epoch", epoch),
                ("runtime(s)", int(end_time - start_time)),
                ("mean_train_cost_" + self.learning_rule.cost.type, mean_train_cost),
                ("mean_train_error_" + stopping_cost_type, mean_train_error),
                ("best_train_error_" + stopping_cost_type, best_train_error),
                ("mean_valid_cost_" + self.learning_rule.cost.type, mean_valid_cost),
                ("mean_valid_error_" + stopping_cost_type, mean_valid_error),
                ("best_valid_error_" + stopping_cost_type, best_valid_error),
                ("mean_test_cost_" + self.learning_rule.cost.type, mean_test_cost),
                ("mean_test_error_" + stopping_cost_type, mean_test_error),
                ("best_test_error_" + stopping_cost_type, best_test_error),
            ]

            outputs += merged_train + merged_valid + merged_test

            self.log._log_outputs(outputs)

            epoch += 1

    def continue_learning(self, epoch, error_dcr, best_valid_error):

        if epoch > self.learning_rule.stopping_criteria["max_epoch"]:
            return False

        elif (
            self.learning_rule.stopping_criteria["percent_decrease"] is None
            or self.learning_rule.stopping_criteria["epoch_look_back"] is None
        ):
            return True

        elif np.abs(error_dcr * 1.0 / best_valid_error) >= self.learning_rule.stopping_criteria["percent_decrease"]:
            self.best_epoch_so_far = epoch
            return True

        elif epoch - self.best_epoch_so_far > self.learning_rule.stopping_criteria["epoch_look_back"]:
            return False

        else:
            return True