示例#1
0
    def train(self, l, method='normal'):
        k = 10
        kfolder = KFolder(self.D, k, normalize=True)
        self.X_train, self.Y_train = [], []
        self.X_test, self.Y_test, self.W = [], [], []
        for i in range(k):
            # Get data and labels at fold k
            X,Y = kfolder.training(i)

            # Solve for the vector of linear factors, W
            if method == 'normal':
                rsolver = RegressionSolver(X, Y)
                Wi = rsolver.solve(l)
            elif method == 'descent':
                gd = GradientDescent(X, Y)
                Wi = gd.linreg_stoch2()
            elif method == 'logistic':
                gd = GradientDescent(X, Y)
                Wi = gd.logreg_stoch()

            # Get the testing data
            Xi,Yi = kfolder.testing(i)

            # Store the results
            self.X_train.append(X), self.Y_train.append(Y)
            self.X_test.append(Xi), self.Y_test.append(Yi), self.W.append(Wi)
示例#2
0
from polluted import PollutedSpambase
from evaluator import Evaluator
from descent import GradientDescent

if __name__=="__main__":
    # Get data
    dataset = PollutedSpambase()
    train_data, train_labels = dataset.training()
    test_data, test_labels = dataset.testing()

    # Do Logistic Regression
    gd = GradientDescent(train_data, train_labels)
    # 200,000 iterations gives ~85% acc
    W = gd.logreg_stoch(it=200001)

    # Evaluate solution
    evaluator = Evaluator([test_data], [test_labels], [W])
    evaluator.accuracy()