Пример #1
0
features, target, target_reg = transformData(features_raw, target_raw)

#
##shuffle and split the data to create train and test datasets
from projectFunctions import splitData
X_train, X_test, y_train, y_test = splitData(features, target, 0.3)
Xr_train, Xr_test, yr_train, yr_test = splitData(features, target_reg, 0.3)
#
from projectFunctions import decTree, drawTree, kneighbors, decTreeReg, kneighbhorsReg
sample_size = len(X_train)
feature_cols = features.columns

#Usin gini and depth = 3
results, learner = decTree(sample_size, X_train, y_train, X_test, y_test,
                           'entropy', 4)
drawTree(learner, feature_cols, 'fire_dt.png')
print "Accuracy for Decision tree Classifier - Training, Test sets: %.5f, %.5f" % (
    results['acc_train'], results['acc_test'])
print "-----------------------------------------------------------------------"

#decision tree regression
results_dreg, learner_dreg = decTreeReg(Xr_train, yr_train, Xr_test, yr_test,
                                        'entropy', 4)
print "R2 score for Decision tree regression -Training, Test sets: %.5f, %.5f" % (
    results_dreg['acc_train'], results_dreg['acc_test'])
print "-----------------------------------------------------------------------"

#kneighbors classifier
resultsK = kneighbors(X_train, y_train, X_test, y_test)
print "Accuracy for K-Neighbors Classifier-Training, Test sets: %.5f, %.5f" % (
    resultsK['acc_train'], resultsK['acc_test'])
Пример #2
0
# Success - Display the first record
if data is not None:
    data.columns = col_names
    #display(data.head(n=1))

#explore the data
from projectFunctions import exploreData
exploreData(data)

drop_col = ['skin', 'label']
features = data.drop(drop_col, axis=1)
target = data['label']
#if features is not None:
#display(features.head(n=1))
#
#shuffle and split the data to create train and test datasets
from projectFunctions import splitData
X_train, X_test, y_train, y_test = splitData(features, target, 0.3)

from projectFunctions import decTree, drawTree
sample_size = len(X_train)
feature_cols = features.columns

results, learner = decTree(sample_size, X_train, y_train, X_test, y_test,
                           'gini', 3)
drawTree(learner, feature_cols, 'diabetes.png')
print "Times for Training, Prediction: %.5f, %.5f" % (results['train_time'],
                                                      results['pred_time'])
print "Accuracy for Training, Test sets: %.5f, %.5f" % (results['acc_train'],
                                                        results['acc_test'])
print "-----------------------------------------------------------------------"
Пример #3
0
results, imp_features = randomForest(X_train, y_train, X_test, y_test)
print "Accuracy for Random forest Classifier - Training, Test sets: %.5f, %.5f" % (
    results['acc_train'], results['acc_test'])
print "-----------------------------------------------------------------------"

#Use only imporatant features from random forest
#X_train = X_train[imp_features]
#X_test  = X_test[imp_features]

#Usin gini and depth = 3
results, learner = decTree(sample_size, X_train, y_train, X_test, y_test,
                           'entropy', 4)
feature_cols = X_train.columns
feature_cols = [x.encode('utf-8') for x in feature_cols]

drawTree(learner, feature_cols, 'churn.png')
print "Accuracy for Decision tree Classifier - Training, Test sets: %.5f, %.5f" % (
    results['acc_train'], results['acc_test'])
print "-----------------------------------------------------------------------"

#kneighbors classifier
resultsK = kneighbors(X_train, y_train, X_test, y_test)
print "Accuracy for K-Neighbors Classifier-Training, Test sets: %.5f, %.5f" % (
    resultsK['acc_train'], resultsK['acc_test'])
print "-----------------------------------------------------------------------"

#SVM classifier
resultsS = svmClass(X_train, y_train, X_test, y_test)
print "Accuracy for SVM Classifier-Training, Test sets: %.5f, %.5f" % (
    resultsS['acc_train'], resultsS['acc_test'])
print "-----------------------------------------------------------------------"
Пример #4
0
from projectFunctions import transformData
features, target = transformData(features_raw, target_raw)
#features['NumSoftTissues'] = np.nan_to_num(features['NumSoftTissues'])

#shuffle and split the data to create train and test datasets
from projectFunctions import splitData
X_train, X_test, y_train, y_test = splitData(features, target, 0.3)

from projectFunctions import decTree, drawTree
sample_size = len(X_train)
feature_cols = features.columns

#Usin gini and depth = 3
results, learner = decTree(sample_size, X_train, y_train, X_test, y_test,
                           'gini', 20)
drawTree(learner, feature_cols, 'ifraud_gini.png')
print "Times for Training, Prediction: %.5f, %.5f" % (results['train_time'],
                                                      results['pred_time'])
print "Accuracy for Training, Test sets: %.5f, %.5f" % (results['acc_train'],
                                                        results['acc_test'])
print "-----------------------------------------------------------------------"

#Usin entropy and depth = 3
results, learner = decTree(sample_size, X_train, y_train, X_test, y_test,
                           'entropy', 20)
drawTree(learner, feature_cols, 'ifraud_etropy.png')
print "Times for Training, Prediction: %.5f, %.5f" % (results['train_time'],
                                                      results['pred_time'])
print "Accuracy for Training, Test sets: %.5f, %.5f" % (results['acc_train'],
                                                        results['acc_test'])
print "-----------------------------------------------------------------------"