Exemplo n.º 1
0
def main(fn):
    # data= pd.read_csv("../data/kddcup.data_10_percent_corrected", names=cols)
    data = pd.read_csv(fn, header=-1)

    # data= remove_missing(data)
    # data= impute_missing(data)
    data = impute_missing2(data)

    # Features to be used in classification
    features = [x for x in range(1, len(data.columns))]

    X = data[features]
    y = data[0]

    #h= TGaussianNB(X, y)
    #h.run()
    print("GaussianNB")
    h = Test(X, y, GaussianNB())
    h.run()
    h.report(fn="../Report/results/cancer.gnb.cm.tex")
    s = Search(X, y, GaussianNB(), [{}])
    s.search()
    s.report("../Report/results/cancer.gnb.tex")

    print("DTree neu")
    parameters = [{
        'criterion': ['gini', 'entropy'],
        'max_features': ['auto', 'sqrt', 'log2']
    }]
    s = Search(X, y, DTree(), parameters)
    s.search()
    s.report("../Report/results/cancer.dt.tex")
    h = Test(X, y,
             DTree(max_features='log2', criterion='gini', random_state=1234))
    h.run()
    h.report(fn="../Report/results/cancer.dt.cm.tex")
    print("RF")
    parameters = [{
        'n_estimators': range(1, 15),
        'criterion': ['gini', 'entropy'],
        'max_features': ['auto', 'sqrt', 'log2']
    }]
    s = Search(X, y, RandomForestClassifier(), parameters)
    s.search()
    s.report("../Report/results/cancer.rf.tex", )
    h = Test(
        X, y,
        RandomForestClassifier(n_estimators=6,
                               criterion='gini',
                               max_features='sqrt',
                               random_state=1234))
    h.run()
    h.report(fn="../Report/results/cancer.rf.cm.tex")

    parameters = [{
        'kernel': ['linear', 'sigmoid', 'rbf', 'poly'],
        'C': [0.1, 1, 10, 11, 20]
    }]
    print("SVM")
    from sklearn import preprocessing
    X_scaled = preprocessing.scale(X)
    s = Search(X_scaled, y, SVC(), parameters)
    s.search()
    s.report("../Report/results/cancer.svm.tex")
    h = Test(X, y, SVC(C=11, kernel='poly'))
    h.run()
    h.report(fn="../Report/results/cancer.svm.cm.tex")

    print("KNeighborsClassifier")
    parameters = [{
        'n_neighbors': range(4, 8),
        'weights': ['uniform', 'distance'],
        'p': [1, 2]
    }]
    s = Search(X, y, KNeighborsClassifier(), parameters)
    s.search()
    s.report("../Report/results/cancer.knn.tex")
    h = Test(X, y, KNeighborsClassifier(n_neighbors=5, weights='uniform', p=2))
    h.run()
    h.report(fn="../Report/results/cancer.knn.cm.tex")