示例#1
0
#tokenize the strig
#Compute the frequency of words in a sentence 
data['PlayerLine'] = data['PlayerLine'].apply(lambda x: tokenString(x,fdist,stop_w))

features, target = exploreData(data)
features_final, target_final = transformData(features, target)

#Split the data with test size = 30
from projectFunctions import splitData,svmClassifier,decTree,naiveBayes
X_train, X_test, y_train, y_test = splitData(features_final, target_final, 0.3)

#results,learner = svmClassi    fier(X_train, X_test, y_train, y_test)

#print "Times for Training, Prediction: %.5f, %.5f" %(results['train_time'], results['pred_time'])     
#print "Accuracy for Training, Test sets: %.5f, %.5f" %(results['acc_train'], results['acc_test'])     
#print "-----------------------------------------------------------------------"

results,learner = decTree(X_train, y_train, X_test, y_test, 'gini', 13)
# 
print "Times for Training, Prediction: %.5f, %.5f" %(results['train_time'], results['pred_time'])     
print "Accuracy for Training, Test sets: %.5f, %.5f" %(results['acc_train'], results['acc_test'])     
print "-----------------------------------------------------------------------"

results,learner = naiveBayes(X_train, y_train, X_test, y_test)
 
print "Times for Training, Prediction: %.5f, %.5f" %(results['train_time'], results['pred_time'])     
print "Accuracy for Training, Test sets: %.5f, %.5f" %(results['acc_train'], results['acc_test'])     
print "-----------------------------------------------------------------------"

#data.to_csv('test.csv',index=False)
示例#2
0
#transform data
from projectFunctions import transformData
features, target, target_reg = transformData(features_raw, target_raw)

#
##shuffle and split the data to create train and test datasets
from projectFunctions import splitData
X_train, X_test, y_train, y_test = splitData(features, target, 0.3)
Xr_train, Xr_test, yr_train, yr_test = splitData(features, target_reg, 0.3)
#
from projectFunctions import decTree, drawTree, kneighbors, decTreeReg, kneighbhorsReg
sample_size = len(X_train)
feature_cols = features.columns

#Usin gini and depth = 3
results, learner = decTree(sample_size, X_train, y_train, X_test, y_test,
                           'entropy', 4)
drawTree(learner, feature_cols, 'fire_dt.png')
print "Accuracy for Decision tree Classifier - Training, Test sets: %.5f, %.5f" % (
    results['acc_train'], results['acc_test'])
print "-----------------------------------------------------------------------"

#decision tree regression
results_dreg, learner_dreg = decTreeReg(Xr_train, yr_train, Xr_test, yr_test,
                                        'entropy', 4)
print "R2 score for Decision tree regression -Training, Test sets: %.5f, %.5f" % (
    results_dreg['acc_train'], results_dreg['acc_test'])
print "-----------------------------------------------------------------------"

#kneighbors classifier
resultsK = kneighbors(X_train, y_train, X_test, y_test)
print "Accuracy for K-Neighbors Classifier-Training, Test sets: %.5f, %.5f" % (
示例#3
0
# Success - Display the first record
if data is not None:
    data.columns = col_names
    #display(data.head(n=1))

#explore the data
from projectFunctions import exploreData
exploreData(data)

drop_col = ['skin', 'label']
features = data.drop(drop_col, axis=1)
target = data['label']
#if features is not None:
#display(features.head(n=1))
#
#shuffle and split the data to create train and test datasets
from projectFunctions import splitData
X_train, X_test, y_train, y_test = splitData(features, target, 0.3)

from projectFunctions import decTree, drawTree
sample_size = len(X_train)
feature_cols = features.columns

results, learner = decTree(sample_size, X_train, y_train, X_test, y_test,
                           'gini', 3)
drawTree(learner, feature_cols, 'diabetes.png')
print "Times for Training, Prediction: %.5f, %.5f" % (results['train_time'],
                                                      results['pred_time'])
print "Accuracy for Training, Test sets: %.5f, %.5f" % (results['acc_train'],
                                                        results['acc_test'])
print "-----------------------------------------------------------------------"