Exemplo n.º 1
0
    def preprocessData(filename):
        X, y, csv = pre.loadDataset(filename, ",")
        X = pre.fillMissingData(X, 2, 3)

        #sex
        X = pre.computeCategorization(X)
        #embark
        X = pre.computeCategorization(X)

        XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(X, y, 0.15)
        XTrain = pre.computeScaling(XTrain)
        XTest = pre.computeScaling(XTest)

        return XTrain, XTest, yTrain, yTest
Exemplo n.º 2
0
def runLinearRegressionExample(filename):
    start_time = time.time()
    X, y = pre.loadDataset(filename)
    elapsed_time = time.time() - start_time
    print("Load Dataset: %.2f" % elapsed_time, "segundos.")

    start_time = time.time()
    X = pre.fillMissingData(X, 1, X.shape[1])
    elapsed_time = time.time() - start_time
    print("Fill Missing Data: %.2f" % elapsed_time, "segundos.")

    start_time = time.time()
    X = pre.computeCategorization(X, 0)
    elapsed_time = time.time() - start_time
    print("Compute Categorization: %.2f" % elapsed_time, "segundos.")

    start_time = time.time()
    XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(X, y, 0.8)
    elapsed_time = time.time() - start_time
    print("Split Train Test sets: %.2f" % elapsed_time, "segundos.")

    start_time = time.time()
    computeLinearRegressionModel(XTrain, yTrain, XTest, yTest)
    elapsed_time = time.time() - start_time
    print("Compute Linear Regression: %.2f" % elapsed_time, "segundos.")
Exemplo n.º 3
0
def runMultipleLinearRegressionExample(filename):
    start_time = time.time()
    X, y = pre.loadDataset(filename)
    elapsed_time = time.time() - start_time
    print("Load Dataset: %.2f" % elapsed_time, "segundos.")

    start_time = time.time()
    X = pre.fillMissingData(X, 0, 2)
    elapsed_time = time.time() - start_time
    print("Fill Missing Data: %.2f" % elapsed_time, "segundos.")

    start_time = time.time()
    X = pre.computeCategorization(X, 3)
    elapsed_time = time.time() - start_time
    print("Compute Categorization: %.2f" % elapsed_time, "segundos.")

    start_time = time.time()
    XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(X, y, 0.8)
    elapsed_time = time.time() - start_time
    print("Split Train Test sets: %.2f" % elapsed_time, "segundos.")

    start_time = time.time()
    XTrain, XTest = computeAutomaticBackwardElimination(
        XTrain, yTrain, XTest, 0.05)
    elapsed_time = time.time() - start_time
    print("Compute Automatic Backward Elimination: %.2f" % elapsed_time,
          "segundos.")

    start_time = time.time()
    computeMultipleLinearRegressionModel(XTrain, yTrain, XTest, yTest)
    elapsed_time = time.time() - start_time
    print("Compute Multiple Linear Regression: %.2f" % elapsed_time,
          "segundos.")
    '''start_time = time.time()
Exemplo n.º 4
0
def computeLogisticRegressionExample(filename):
    X, y, csv = pre.loadDataset(filename, ",")
    X = pre.fillMissingData(X, 2, 3)

    #sex
    X = pre.computeCategorization(X)
    #embark
    X = pre.computeCategorization(X)

    XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(X, y, 0.15)
    XTrain = pre.computeScaling(XTrain)
    XTest = pre.computeScaling(XTest)

    classifier = computeLogisticRegressionModel(XTrain, yTrain, XTest)
    yPred = predictModel(classifier, XTest)
    return evaluateModel(classifier, yPred, yTest)
Exemplo n.º 5
0
    def preprocessDataCrossValidation(args, use_scaling):
        X, y, csv = pre.loadDataset(args.dataset, args.delimiter)

        if (args.fill_missing_data_columns is not None):
            columns = args.fill_missing_data_columns.split(',')
            columns = [int(x) for x in columns]

            offset = 0
            for n in columns:
                X = pre.fillMissingData(X, n + offset)
                offset += n

        if (args.one_hot_encoding_columns is not None):
            columns = args.one_hot_encoding_columns.split(',')
            columns = [int(x) for x in columns]

            offset = 0
            for n in columns:
                X, o = pre.computeCategorization(X, n + offset)
                offset += o - 1

        if (use_scaling == True):
            X = pre.computeScaling(X)

        if (len(X) == 2):
            X = X[0]

        return X, y
Exemplo n.º 6
0
    def preprocessData(args, use_scaling):
        X, y, csv = pre.loadDataset(args.dataset, args.delimiter)

        if (args.fill_missing_data_columns is not None):
            columns = args.fill_missing_data_columns.split(',')
            columns = [int(x) for x in columns]

            offset = 0
            for n in columns:
                X = pre.fillMissingData(X, n + offset)
                offset += n

        if (args.one_hot_encoding_columns is not None):
            columns = args.one_hot_encoding_columns.split(',')
            columns = [int(x) for x in columns]

            offset = 0
            for n in columns:
                X, o = pre.computeCategorization(X, n + offset)
                offset += o - 1

        XTrain, XTest, yTrain, yTest = pre.splitTrainTestSets(
            X, y, args.test_size)

        if (use_scaling == True):
            XTrain = pre.computeScaling(XTrain)
            XTest = pre.computeScaling(XTest)

        if (len(XTrain) == 2):
            XTrain = XTrain[0]
        if (len(XTest) == 2):
            XTest = XTest[0]

        return XTrain, XTest, yTrain, yTest