def Ionosphere(X):
        binaryCols = {"signal": {"g": 1, "b": 0}}
        X = X.copy()
        X = Processor.removeMissing(X)
        X = X.drop(columns=['col0', 'col1', "col13"])
        X = Processor.toBinaryCol(X, binaryCols)
        Y = X["signal"]
        X = X.iloc[:, :-1]

        return [X, Y]
示例#2
0
    def ttt(X):
        labels = {"o": 0, "b": 1, "x": 2}
        encoding = {"result": {"positive": 1, "negative": 0}}
        X = X.copy()
        X = Processor.toBinaryCol(X, encoding)
        X = Processor.OHE(
            X, cols=["tl", "tm", "tr", "ml", "mm", "mr", "bl", "bm", "br"])
        Y = X["result"]
        X = X.drop(columns=["result"])

        return [X, Y]
    def adult(X):
        binaryCols = {
            "sex": {
                "Male": 0,
                "Female": 1
            },
            "salary": {
                ">50K": 0,
                "<=50K": 1
            }
        }
        X = X.copy()
        X = Processor.removeMissing(X)
        X = Processor.toBinaryCol(X, binaryCols)

        X = Processor.normalize(X, ["fnlwgt", "hours-per-week"])
        Y = X["salary"]
        X = X.iloc[:, :-1]
        X = Processor.OHE(X)

        countryCols = [
            "native-country_Cambodia", "native-country_England",
            "native-country_Puerto-Rico", "native-country_Canada",
            "native-country_Outlying-US(Guam-USVI-etc)",
            "native-country_India", "native-country_Japan",
            "native-country_Greece", "native-country_South",
            "native-country_China", "native-country_Cuba",
            "native-country_Iran", "native-country_Honduras",
            "native-country_Italy", "native-country_Poland",
            "native-country_Jamaica", "native-country_Vietnam",
            "native-country_Portugal", "native-country_Ireland",
            "native-country_France", "native-country_Dominican-Republic",
            "native-country_Laos", "native-country_Ecuador",
            "native-country_Taiwan", "native-country_Haiti",
            "native-country_Columbia", "native-country_Hungary",
            "native-country_Guatemala", "native-country_Nicaragua",
            "native-country_Scotland", "native-country_Thailand",
            "native-country_Yugoslavia", "native-country_El-Salvador",
            "native-country_Trinadad&Tobago", "native-country_Peru",
            "native-country_Hong", "native-country_Holand-Netherlands"
        ]

        X = X.drop(columns=(["capital-gain", "capital-loss", "education-num"] +
                            countryCols))

        return [X, Y]
 def mam(X):
     X = X.copy()
     X = Processor.fillMissing(X)
     Y = X["result"]
     X = X.drop(columns=["result"])
     return [X, Y]
示例#5
0
    df = pd.DataFrame(data)

    df_to_table(df, 'time_table_all_final')

    # print(evaluate_acc(Processor.ToNumpyCol(Y_test), model.predict(X_test.to_numpy())))

    #print(cross_validation(5, X_train.to_numpy(), Processor.ToNumpyCol(Y_train), model))

elif ds == "ionosphere":
    path = "../datasets/ionosphere/ionosphere.data"

    header = ["{}{}".format("col", x) for x in range(33 + 1)]
    header.append("signal")

    All = Processor.read(path, header)

    [X, Y] = Clean.Ionosphere(All)

    [X_train, X_test, Y_train, Y_test] = Processor.split(X, Y, train=0.8)
    setup = '''
from Project1.src.NaiveBayes import NaiveBayes
from Project1.src.Processor import Processor
from Project1.src.Clean import Clean
from Project1.src.CrossValidation import cross_validation

path = "../datasets/ionosphere/ionosphere.data"

header = ["{}{}".format("col", x) for x in range(33 + 1)]
header.append("signal")
import matplotlib.pyplot as plt
import numpy as np
from Project1.src.LogisticRegression import LogisticRegression
from Project1.src.NaiveBayes import NaiveBayes
from Project1.src.Processor import Processor
from Project1.src.Clean import Clean
from Project1.src.CrossValidation import cross_validation
from Project1.src.CrossValidation import evaluate_acc



print("Analyzing the ionosphere data set")
path = "../datasets/ionosphere/ionosphere.data"
header = ["{}{}".format("col", x) for x in range(33 + 1)]
header.append("signal")
All = Processor.read(path, header)
[X, Y] = Clean.Ionosphere(All)

X = X.to_numpy()
Y = Processor.ToNumpyCol(Y)

iters = np.arange(20, X.shape[0], 50)
#print(X.shape)
#print(Y.shape)

accuracies = []

for iter_ in iters:
    #rowsX = X[0:X.shape[0], :]
    #rowsY = Y[0:Y.shape[0], :]
    rowsX = X[0:iter_, :]
示例#7
0
Learning rates and threshold gradient were chosen using the results of the hyperparameter tuning script
"""
from Project1.src.LogisticRegression import LogisticRegression
from Project1.src.NaiveBayes import NaiveBayes
from Project1.src.CrossValidation import cross_validation
from Project1.src.Processor import Processor
from Project1.src.Clean import Clean
from Project1.src.HPTuning import df_to_table
import pandas as pd

# Find accuracies for ionosphere data set
print("Analyzing the ionosphere data set")
path = "../datasets/ionosphere/ionosphere.data"
header = ["{}{}".format("col", x) for x in range(33 + 1)]
header.append("signal")
All = Processor.read(path, header)
[X, Y] = Clean.Ionosphere(All)

ionosphere_results = ['ionosphere']
acc, _, _ = cross_validation(5,
                             X.to_numpy(),
                             Processor.ToNumpyCol(Y),
                             LogisticRegression(),
                             learning_rate=1.0,
                             max_gradient=1e-2,
                             max_iters=50000)
ionosphere_results.append(round(acc, 2))
acc = cross_validation(5, X.to_numpy(), Processor.ToNumpyCol(Y), NaiveBayes())
ionosphere_results.append(round(acc, 2))
print(ionosphere_results)