def preprocessData(filename): X, y, csv = pre.loadDataset(filename, ",") X = pre.fillMissingData(X, 2, 3) #sex X = pre.computeCategorization(X) #embark X = pre.computeCategorization(X) XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(X, y, 0.15) XTrain = pre.computeScaling(XTrain) XTest = pre.computeScaling(XTest) return XTrain, XTest, yTrain, yTest
def runLinearRegressionExample(filename): start_time = time.time() X, y = pre.loadDataset(filename) elapsed_time = time.time() - start_time print("Load Dataset: %.2f" % elapsed_time, "segundos.") start_time = time.time() X = pre.fillMissingData(X, 1, X.shape[1]) elapsed_time = time.time() - start_time print("Fill Missing Data: %.2f" % elapsed_time, "segundos.") start_time = time.time() X = pre.computeCategorization(X, 0) elapsed_time = time.time() - start_time print("Compute Categorization: %.2f" % elapsed_time, "segundos.") start_time = time.time() XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(X, y, 0.8) elapsed_time = time.time() - start_time print("Split Train Test sets: %.2f" % elapsed_time, "segundos.") start_time = time.time() computeLinearRegressionModel(XTrain, yTrain, XTest, yTest) elapsed_time = time.time() - start_time print("Compute Linear Regression: %.2f" % elapsed_time, "segundos.")
def runMultipleLinearRegressionExample(filename): start_time = time.time() X, y = pre.loadDataset(filename) elapsed_time = time.time() - start_time print("Load Dataset: %.2f" % elapsed_time, "segundos.") start_time = time.time() X = pre.fillMissingData(X, 0, 2) elapsed_time = time.time() - start_time print("Fill Missing Data: %.2f" % elapsed_time, "segundos.") start_time = time.time() X = pre.computeCategorization(X, 3) elapsed_time = time.time() - start_time print("Compute Categorization: %.2f" % elapsed_time, "segundos.") start_time = time.time() XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(X, y, 0.8) elapsed_time = time.time() - start_time print("Split Train Test sets: %.2f" % elapsed_time, "segundos.") start_time = time.time() XTrain, XTest = computeAutomaticBackwardElimination( XTrain, yTrain, XTest, 0.05) elapsed_time = time.time() - start_time print("Compute Automatic Backward Elimination: %.2f" % elapsed_time, "segundos.") start_time = time.time() computeMultipleLinearRegressionModel(XTrain, yTrain, XTest, yTest) elapsed_time = time.time() - start_time print("Compute Multiple Linear Regression: %.2f" % elapsed_time, "segundos.") '''start_time = time.time()
def computeLogisticRegressionExample(filename): X, y, csv = pre.loadDataset(filename, ",") X = pre.fillMissingData(X, 2, 3) #sex X = pre.computeCategorization(X) #embark X = pre.computeCategorization(X) XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(X, y, 0.15) XTrain = pre.computeScaling(XTrain) XTest = pre.computeScaling(XTest) classifier = computeLogisticRegressionModel(XTrain, yTrain, XTest) yPred = predictModel(classifier, XTest) return evaluateModel(classifier, yPred, yTest)
def preprocessDataCrossValidation(args, use_scaling): X, y, csv = pre.loadDataset(args.dataset, args.delimiter) if (args.fill_missing_data_columns is not None): columns = args.fill_missing_data_columns.split(',') columns = [int(x) for x in columns] offset = 0 for n in columns: X = pre.fillMissingData(X, n + offset) offset += n if (args.one_hot_encoding_columns is not None): columns = args.one_hot_encoding_columns.split(',') columns = [int(x) for x in columns] offset = 0 for n in columns: X, o = pre.computeCategorization(X, n + offset) offset += o - 1 if (use_scaling == True): X = pre.computeScaling(X) if (len(X) == 2): X = X[0] return X, y
def preprocessData(args, use_scaling): X, y, csv = pre.loadDataset(args.dataset, args.delimiter) if (args.fill_missing_data_columns is not None): columns = args.fill_missing_data_columns.split(',') columns = [int(x) for x in columns] offset = 0 for n in columns: X = pre.fillMissingData(X, n + offset) offset += n if (args.one_hot_encoding_columns is not None): columns = args.one_hot_encoding_columns.split(',') columns = [int(x) for x in columns] offset = 0 for n in columns: X, o = pre.computeCategorization(X, n + offset) offset += o - 1 XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets( X, y, args.test_size) if (use_scaling == True): XTrain = pre.computeScaling(XTrain) XTest = pre.computeScaling(XTest) if (len(XTrain) == 2): XTrain = XTrain[0] if (len(XTest) == 2): XTest = XTest[0] return XTrain, XTest, yTrain, yTest