Пример #1
0
    def fit(self, X, valid_X=None):
        # Input size:
        # n_samples
        # n_timesteps
        # n_features
        if self.input_checking:
            X = rnn_check_array(X)

        self.n_features = X[0].shape[-1]

        # Regression features
        self.input_size_ = self.n_features
        self.output_size_ = self.n_features
        X_sym = T.tensor3('x')
        y_sym = T.tensor3('y')
        X_mask_sym = T.matrix('x_mask')
        y_mask_sym = T.matrix('y_mask')

        self.layers_ = []
        self.layer_sizes_ = [self.input_size_]
        self.layer_sizes_.extend(self.hidden_layer_sizes)
        self.layer_sizes_.append(self.output_size_)

        self.training_loss_ = []
        if valid_X is not None:
            self.validation_loss_ = []
            if self.input_checking:
                valid_X = rnn_check_array(valid_X)

        best_valid_loss = np.inf
        best_train_loss = np.inf
        try:
            for itr in range(self.max_iter):
                print("Starting pass %d through the dataset" % itr)
                total_train_loss = 0
                for i, j in minibatch_indices(X, self.minibatch_size):
                    X_n, y_n, X_mask, y_mask = make_regression(X[i:j])
                    if not hasattr(self, 'fit_function'):
                        # This is here to make debugging easier
                        X_sym.tag.test_value = X_n
                        y_sym.tag.test_value = y_n
                        X_mask_sym.tag.test_value = X_mask
                        y_mask_sym.tag.test_value = y_mask
                        print("Building model!")
                        print("Minibatch X size %s" % str(X_n.shape))
                        print("Minibatch y size %s" % str(y_n.shape))
                        self._setup_functions(X_sym, y_sym, X_mask_sym,
                                              y_mask_sym, self.layer_sizes_)
                    train_loss = self.fit_function(X_n, y_n, X_mask, y_mask)
                    total_train_loss += train_loss
                current_train_loss = total_train_loss / len(X)
                print("Training loss %f" % current_train_loss)
                self.training_loss_.append(current_train_loss)

                if valid_X is not None:
                    total_valid_loss = 0
                    for i, j in minibatch_indices(valid_X, self.minibatch_size):
                        valid_X_n, valid_y_n, _, _ = make_regression(
                            valid_X[i:j], self.window_size,
                            self.prediction_size)
                        valid_loss = self.loss_function(valid_X_n, valid_y_n)
                        total_valid_loss += valid_loss
                    current_valid_loss = total_valid_loss / len(valid_X)
                    print("Validation loss %f" % current_valid_loss)
                    self.validation_loss_.append(current_valid_loss)

                if (itr % self.save_frequency) == 0:
                    f = open(self.model_save_name + "_snapshot.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()
                    if current_train_loss < best_train_loss:
                        best_train_loss = current_train_loss
                        f = open(self.model_save_name + "_train_best.pkl", 'wb')
                        cPickle.dump(self, f, protocol=2)
                        f.close()

                if itr == (self.max_iter - 1):
                    f = open(self.model_save_name + "_last.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()

                # Shortcircuit if statement
                if valid_X is not None and current_valid_loss < best_valid_loss:
                    best_valid_loss = current_valid_loss
                    f = open(self.model_save_name + "_valid_best.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()
        except KeyboardInterrupt:
            print("User cancelled, saving last model!")
            f = open(self.model_save_name + "_interrupt.pkl", 'wb')
            cPickle.dump(self, f, protocol=2)
            f.close()
Пример #2
0
    def fit(self, X, valid_X=None):
        # Input size:
        # n_samples
        # n_timesteps
        # n_features
        if self.input_checking:
            X = rnn_check_array(X)

        self.n_features = X[0].shape[-1]

        # Regression features
        self.input_size_ = self.n_features
        self.output_size_ = self.n_features
        X_sym = T.tensor3('x')
        y_sym = T.tensor3('y')
        X_mask_sym = T.matrix('x_mask')
        y_mask_sym = T.matrix('y_mask')

        self.layers_ = []
        self.layer_sizes_ = [self.input_size_]
        self.layer_sizes_.extend(self.hidden_layer_sizes)
        self.layer_sizes_.append(self.output_size_)

        self.training_loss_ = []
        if valid_X is not None:
            self.validation_loss_ = []
            if self.input_checking:
                valid_X = rnn_check_array(valid_X)

        best_valid_loss = np.inf
        best_train_loss = np.inf
        try:
            for itr in range(self.max_iter):
                print("Starting pass %d through the dataset" % itr)
                total_train_loss = 0
                for i, j in minibatch_indices(X, self.minibatch_size):
                    X_n, y_n, X_mask, y_mask = make_regression(X[i:j])
                    if not hasattr(self, 'fit_function'):
                        # This is here to make debugging easier
                        X_sym.tag.test_value = X_n
                        y_sym.tag.test_value = y_n
                        X_mask_sym.tag.test_value = X_mask
                        y_mask_sym.tag.test_value = y_mask
                        print("Building model!")
                        print("Minibatch X size %s" % str(X_n.shape))
                        print("Minibatch y size %s" % str(y_n.shape))
                        self._setup_functions(X_sym, y_sym, X_mask_sym,
                                              y_mask_sym, self.layer_sizes_)
                    train_loss = self.fit_function(X_n, y_n, X_mask, y_mask)
                    total_train_loss += train_loss
                current_train_loss = total_train_loss / len(X)
                print("Training loss %f" % current_train_loss)
                self.training_loss_.append(current_train_loss)

                if valid_X is not None:
                    total_valid_loss = 0
                    for i, j in minibatch_indices(valid_X,
                                                  self.minibatch_size):
                        valid_X_n, valid_y_n, _, _ = make_regression(
                            valid_X[i:j], self.window_size,
                            self.prediction_size)
                        valid_loss = self.loss_function(valid_X_n, valid_y_n)
                        total_valid_loss += valid_loss
                    current_valid_loss = total_valid_loss / len(valid_X)
                    print("Validation loss %f" % current_valid_loss)
                    self.validation_loss_.append(current_valid_loss)

                if (itr % self.save_frequency) == 0:
                    f = open(self.model_save_name + "_snapshot.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()
                    if current_train_loss < best_train_loss:
                        best_train_loss = current_train_loss
                        f = open(self.model_save_name + "_train_best.pkl",
                                 'wb')
                        cPickle.dump(self, f, protocol=2)
                        f.close()

                if itr == (self.max_iter - 1):
                    f = open(self.model_save_name + "_last.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()

                # Shortcircuit if statement
                if valid_X is not None and current_valid_loss < best_valid_loss:
                    best_valid_loss = current_valid_loss
                    f = open(self.model_save_name + "_valid_best.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()
        except KeyboardInterrupt:
            print("User cancelled, saving last model!")
            f = open(self.model_save_name + "_interrupt.pkl", 'wb')
            cPickle.dump(self, f, protocol=2)
            f.close()
Пример #3
0
    def fit(self, X, y, valid_X=None, valid_y=None):
        if self.input_checking:
            X, y = rnn_check_array(X, y)
        input_size = X[0].shape[1]
        # Assume that class values are sequential! and start from 0
        highest_class = np.max([np.max(d) for d in y])
        lowest_class = np.min([np.min(d) for d in y])
        if lowest_class != 0:
            raise ValueError("Labels must start from 0!")
        # Create a list of all classes, then get uniques
        # sum(lists, []) is list concatenation
        all_classes = np.unique(sum([list(np.unique(d)) for d in y], []))
        # +1 to include endpoint
        output_size = len(np.arange(lowest_class, highest_class + 1))
        X_sym = T.tensor3('x')
        y_sym = T.tensor3('y')
        X_mask = T.matrix('x_mask')
        y_mask = T.matrix('y_mask')

        self.layers_ = []
        self.layer_sizes_ = [input_size]
        self.layer_sizes_.extend(self.hidden_layer_sizes)
        self.layer_sizes_.append(output_size)
        if not hasattr(self, 'fit_function'):
            print("Building model!")
            self._setup_functions(X_sym, y_sym, X_mask, y_mask,
                                  self.layer_sizes_)
        self.training_loss_ = []
        if valid_X is not None:
            self.validation_loss_ = []
            if self.input_checking:
                valid_X, valid_y = rnn_check_array(valid_X, valid_y)
                for vy in valid_y:
                    if not np.in1d(np.unique(vy), all_classes).all():
                        raise ValueError(
                            "Validation set contains classes not in training"
                            "set! Training set classes: %s\n, Validation set \
                             classes: %s" % (all_classes, np.unique(vy)))

        best_valid_loss = np.inf
        best_train_loss = np.inf
        try:
            for itr in range(self.max_iter):
                print("Starting pass %d through the dataset" % itr)
                total_train_loss = 0
                for i, j in minibatch_indices(X, self.minibatch_size):
                    X_n, y_n, X_mask, y_mask = make_minibatch(X[i:j], y[i:j],
                                                              output_size)
                    train_loss = self.fit_function(X_n, y_n, X_mask, y_mask)
                    total_train_loss += train_loss
                current_train_loss = total_train_loss / len(X)
                print("Training loss %f" % current_train_loss)
                self.training_loss_.append(current_train_loss)
                if valid_X is not None:
                    total_valid_loss = 0
                    for i, j in minibatch_indices(valid_X, self.minibatch_size):
                        valid_X_n, valid_y_n, X_mask, y_mask = make_minibatch(
                            valid_X[i:j], valid_y[i:j], output_size)
                        valid_loss = self.loss_function(valid_X_n, valid_y_n,
                                                        X_mask, y_mask)
                        total_valid_loss += valid_loss
                    current_valid_loss = total_valid_loss / len(valid_X)
                    print("Validation loss %f" % current_valid_loss)
                    self.validation_loss_.append(current_valid_loss)

                if (itr % self.save_frequency) == 0:
                    f = open(self.model_save_name + "_snapshot.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()
                    if current_train_loss < best_train_loss:
                        best_train_loss = current_train_loss
                        f = open(self.model_save_name + "_train_best.pkl", 'wb')
                        cPickle.dump(self, f, protocol=2)
                        f.close()

                if itr == (self.max_iter - 1):
                    f = open(self.model_save_name + "_last.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()

                # Shortcircuit if statement
                if valid_X is not None and current_valid_loss < best_valid_loss:
                    best_valid_loss = current_valid_loss
                    f = open(self.model_save_name + "_valid_best.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()
        except KeyboardInterrupt:
            print("User cancelled, saving last model!")
            f = open(self.model_save_name + "_interrupt.pkl", 'wb')
            cPickle.dump(self, f, protocol=2)
            f.close()
Пример #4
0
    def fit(self, X, y, valid_X=None, valid_y=None):
        if self.input_checking:
            X, y = rnn_check_array(X, y)
        input_size = X[0].shape[1]
        # Assume that class values are sequential! and start from 0
        highest_class = np.max([np.max(d) for d in y])
        lowest_class = np.min([np.min(d) for d in y])
        if lowest_class != 0:
            raise ValueError("Labels must start from 0!")
        # Create a list of all classes, then get uniques
        # sum(lists, []) is list concatenation
        all_classes = np.unique(sum([list(np.unique(d)) for d in y], []))
        # +1 to include endpoint
        output_size = len(np.arange(lowest_class, highest_class + 1))
        X_sym = T.tensor3('x')
        y_sym = T.tensor3('y')
        X_mask = T.matrix('x_mask')
        y_mask = T.matrix('y_mask')

        self.layers_ = []
        self.layer_sizes_ = [input_size]
        self.layer_sizes_.extend(self.hidden_layer_sizes)
        self.layer_sizes_.append(output_size)
        if not hasattr(self, 'fit_function'):
            print("Building model!")
            self._setup_functions(X_sym, y_sym, X_mask, y_mask,
                                  self.layer_sizes_)
        self.training_loss_ = []
        if valid_X is not None:
            self.validation_loss_ = []
            if self.input_checking:
                valid_X, valid_y = rnn_check_array(valid_X, valid_y)
                for vy in valid_y:
                    if not np.in1d(np.unique(vy), all_classes).all():
                        raise ValueError(
                            "Validation set contains classes not in training"
                            "set! Training set classes: %s\n, Validation set \
                             classes: %s" % (all_classes, np.unique(vy)))

        best_valid_loss = np.inf
        best_train_loss = np.inf
        try:
            for itr in range(self.max_iter):
                print("Starting pass %d through the dataset" % itr)
                total_train_loss = 0
                for i, j in minibatch_indices(X, self.minibatch_size):
                    X_n, y_n, X_mask, y_mask = make_minibatch(
                        X[i:j], y[i:j], output_size)
                    train_loss = self.fit_function(X_n, y_n, X_mask, y_mask)
                    total_train_loss += train_loss
                current_train_loss = total_train_loss / len(X)
                print("Training loss %f" % current_train_loss)
                self.training_loss_.append(current_train_loss)
                if valid_X is not None:
                    total_valid_loss = 0
                    for i, j in minibatch_indices(valid_X,
                                                  self.minibatch_size):
                        valid_X_n, valid_y_n, X_mask, y_mask = make_minibatch(
                            valid_X[i:j], valid_y[i:j], output_size)
                        valid_loss = self.loss_function(
                            valid_X_n, valid_y_n, X_mask, y_mask)
                        total_valid_loss += valid_loss
                    current_valid_loss = total_valid_loss / len(valid_X)
                    print("Validation loss %f" % current_valid_loss)
                    self.validation_loss_.append(current_valid_loss)

                if (itr % self.save_frequency) == 0:
                    f = open(self.model_save_name + "_snapshot.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()
                    if current_train_loss < best_train_loss:
                        best_train_loss = current_train_loss
                        f = open(self.model_save_name + "_train_best.pkl",
                                 'wb')
                        cPickle.dump(self, f, protocol=2)
                        f.close()

                if itr == (self.max_iter - 1):
                    f = open(self.model_save_name + "_last.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()

                # Shortcircuit if statement
                if valid_X is not None and current_valid_loss < best_valid_loss:
                    best_valid_loss = current_valid_loss
                    f = open(self.model_save_name + "_valid_best.pkl", 'wb')
                    cPickle.dump(self, f, protocol=2)
                    f.close()
        except KeyboardInterrupt:
            print("User cancelled, saving last model!")
            f = open(self.model_save_name + "_interrupt.pkl", 'wb')
            cPickle.dump(self, f, protocol=2)
            f.close()