Пример #1
0
def main():
    df = pd.read_csv('wine.csv')
    #pp.dfExplore(df) #exploration function for paper analysis
    pp.clean(df)  #Normalizes data
    #df.drop('resid_sugar', axis = 1, inplace=True)
    #df.drop('free_sulf_d', axis = 1, inplace=True)
    #df.drop('citric_acid', axis = 1, inplace=True)
    #df.drop('density', axis = 1, inplace=True)
    #df.drop('fx_acidity', axis = 1, inplace=True)
    x = createX(df)
    y = createY(df)

    #Feature selection
    temp = list(df)
    print(temp)
    fs.recursiveFeatureElimination(x, y)
    fs.featureImportance(x, y)

    #classifiers
    results = clf.decisionTree(x, y)
    confMatrixOutput('Decision Tree', results)
    results = clf.supportVectorMachine(x, y)
    confMatrixOutput('Support Vector Machine', results)
    results = clf.ANN(x, y)
    confMatrixOutput('Artificial Neural Network', results)
    results = clf.randomForest(x, y)
    confMatrixOutput('Random Forest', results)
    results = clf.rulesBased(x, y)
    confMatrixOutput('Rules Based Classifier', results)

    pp.bayes(df)  # discretizes all the data to low, medium, and high.
    x = createX(df)
    y = createY(df)
    results = clf.bayesClassifier(x, y)
    confMatrixOutput('Naive Bayesian Classifier', results)
Пример #2
0
def experiment6_1(train, test, f):
    over_sampled_train = SMOTEOverSampling(train)
    keep = decisionTreeFSelect(over_sampled_train)
    keep = f(over_sampled_train[keep])
    train = Standardization(over_sampled_train[keep])
    test = Standardization(test[keep])
    return randomForest(train, test)
Пример #3
0
def random_forest_depth_exp_SM_LV_ST_RF(train, test, max_depth):
    over_sampled_train = SMOTEOverSampling(train)
    keep = lowVarianceElimination(over_sampled_train, 0.8)
    train = Standardization(over_sampled_train[keep])
    test = Standardization(test[keep])
    return randomForest(train, test, max_depth=max_depth)
Пример #4
0
def experiment17(train, test, f):
    keep = f(train)
    train = Standardization(train[keep])
    test = Standardization(test[keep])
    return randomForest(train, test)
Пример #5
0
def experiment18_1(train, test, f):
    keep = decisionTreeFSelect(train)
    keep = f(train[keep])
    train = Standardization(train[keep])
    test = Standardization(test[keep])
    return randomForest(train, test)
Пример #6
0
def experiment18(train, test, f):
    keep = univariateFSelect(train)
    keep = f(train[keep])
    train = Standardization(train[keep])
    test = Standardization(test[keep])
    return randomForest(train, test)
Пример #7
0
def experiment5(train, test, f):
    over_sampled_train = SMOTEOverSampling(train)
    keep = f(over_sampled_train)
    train = Standardization(over_sampled_train[keep])
    test = Standardization(test[keep])
    return randomForest(train, test)
Пример #8
0
def experiment12_1(train, test, f):
    over_sampled_train = SMOTEOverSampling(train)
    keep = decisionTreeFSelect(over_sampled_train)
    keep = f(over_sampled_train[keep])
    return randomForest(over_sampled_train[keep], test[keep])
Пример #9
0
def experiment12(train, test, f):
    over_sampled_train = SMOTEOverSampling(train)
    keep = univariateFSelect(over_sampled_train)
    keep = f(over_sampled_train[keep])
    return randomForest(over_sampled_train[keep], test[keep])
Пример #10
0
def experiment11(train, test, f):
    over_sampled_train = SMOTEOverSampling(train)
    keep = f(over_sampled_train)
    return randomForest(over_sampled_train[keep], test[keep])