def train(self, l, method='normal'): k = 10 kfolder = KFolder(self.D, k, normalize=True) self.X_train, self.Y_train = [], [] self.X_test, self.Y_test, self.W = [], [], [] for i in range(k): # Get data and labels at fold k X,Y = kfolder.training(i) # Solve for the vector of linear factors, W if method == 'normal': rsolver = RegressionSolver(X, Y) Wi = rsolver.solve(l) elif method == 'descent': gd = GradientDescent(X, Y) Wi = gd.linreg_stoch2() elif method == 'logistic': gd = GradientDescent(X, Y) Wi = gd.logreg_stoch() # Get the testing data Xi,Yi = kfolder.testing(i) # Store the results self.X_train.append(X), self.Y_train.append(Y) self.X_test.append(Xi), self.Y_test.append(Yi), self.W.append(Wi)
from polluted import PollutedSpambase from evaluator import Evaluator from descent import GradientDescent if __name__=="__main__": # Get data dataset = PollutedSpambase() train_data, train_labels = dataset.training() test_data, test_labels = dataset.testing() # Do Logistic Regression gd = GradientDescent(train_data, train_labels) # 200,000 iterations gives ~85% acc W = gd.logreg_stoch(it=200001) # Evaluate solution evaluator = Evaluator([test_data], [test_labels], [W]) evaluator.accuracy()