示例#1
0
np.random.shuffle(idx)
X = X[idx]
y = y[idx]

# split the data
Xtrain = X[:nTrain, :]
ytrain = y[:nTrain]
Xtest = X[nTrain:, :]
ytest = y[nTrain:]

# train the decision tree
#modelDT = DecisionTreeClassifier()
#modelDT.fit(Xtrain,ytrain)

# train the boosted DT
modelBoostedDT = BoostedDT(numBoostingIters=100, maxTreeDepth=2)
modelBoostedDT.fit(Xtrain, ytrain)

# output predictions on the remaining data
#ypred_DT = modelDT.predict(Xtest)
ypred_BoostedDT = modelBoostedDT.predict(Xtest)
# compute the training accuracy of the model
#accuracyDT = accuracy_score(ytest, ypred_DT)
accuracyBoostedDT = accuracy_score(ytest, ypred_BoostedDT)

#print "Decision Tree Accuracy = "+str(accuracyDT)
print "Boosted Decision Tree Accuracy = " + str(accuracyBoostedDT)

# challenge data
challengeTrainingData = np.loadtxt('data/challengeTrainLabeled.dat',
                                   delimiter=',')
示例#2
0
np.random.seed(13)
np.random.shuffle(idx)
X = X[idx]
y = y[idx]

# split the data
Xtrain = X[:nTrain,:]
ytrain = y[:nTrain]
Xtest = X[nTrain:,:]
ytest = y[nTrain:]
print ytest
# print Xtrain
# train the decision tree
modelDT = DecisionTreeClassifier()
modelDT.fit(Xtrain,ytrain)

# train the boosted DT
modelBoostedDT = BoostedDT(numBoostingIters=100, maxTreeDepth=2)
modelBoostedDT.fit(Xtrain,ytrain)

# output predictions on the remaining data
ypred_DT = modelDT.predict(Xtest)
ypred_BoostedDT = modelBoostedDT.predict(Xtest)

# compute the training accuracy of the model
accuracyDT = accuracy_score(ytest, ypred_DT)
accuracyBoostedDT = accuracy_score(ytest, ypred_BoostedDT)
print ','.join(str(e) for e in ypred_BoostedDT.astype(int))

print "Decision Tree Accuracy = "+str(accuracyDT)
print "Boosted Decision Tree Accuracy = "+str(accuracyBoostedDT)
X = X[idx]
y = y[idx]
# split the data
Xtrain = X[:nTrain, :]
ytrain = y[:nTrain]
Xtest = X[nTrain:, :]
ytest = y[nTrain:]

# train the decision tree
modelDT = DecisionTreeClassifier()
modelDT.fit(Xtrain, ytrain)

#print ypred_DT

# train the boosted DT
modelBoostedDT = BoostedDT(numBoostingIters=100, maxTreeDepth=3)
model = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=2),
                           n_estimators=100,
                           random_state=13)
kfold = cross_validation.KFold(n=n, n_folds=2, random_state=13)
results = cross_validation.cross_val_score(model, X, y, cv=kfold)
modelBoostedDT.fit(Xtrain, ytrain)
model.fit(Xtrain, ytrain)
clf = SVC()
clf.fit(Xtrain, ytrain)
y_pred_rbf1 = clf.predict(Xtest)
scores = list()
scores_rbf = list()

k_model = BoostedDT(numBoostingIters=140, maxTreeDepth=5)
k_model.fit(X, y)
示例#4
0
idx = np.arange(n)
np.random.seed(13)
np.random.shuffle(idx)
X = X[idx]
y = y[idx]

# split the data
Xtrain = X[:nTrain,:]
ytrain = y[:nTrain]
Xtest = X[nTrain:,:]
ytest = y[nTrain:]

# train the decision tree
modelDT = DecisionTreeClassifier()
modelDT.fit(Xtrain,ytrain)

# train the boosted DT
modelBoostedDT = BoostedDT(numBoostingIters=200, maxTreeDepth=3)
modelBoostedDT.fit(Xtrain,ytrain)

# output predictions on the remaining data
ypred_DT = modelDT.predict(Xtest)
ypred_BoostedDT = modelBoostedDT.predict(Xtest)

# compute the training accuracy of the model
accuracyDT = accuracy_score(ytest, ypred_DT)
accuracyBoostedDT = accuracy_score(ytest, ypred_BoostedDT)

print "Decision Tree Accuracy = "+str(accuracyDT)
print "Boosted Decision Tree Accuracy = "+str(accuracyBoostedDT)
filename = 'data/challengeTestUnlabeled.dat'
X2 = np.loadtxt(filename, delimiter=',')

#n,d = X.shape
#nTrain = 0.50*n  #training on 50% of the data

# split the data
#Xtrain = X[:nTrain,:]
#ytrain = y[:nTrain]
#Xtest = X[nTrain:,:]
#ytest = y[nTrain:]
Xtest_unlabeled = X2

# train the boosted DT
modelBoostedDT = BoostedDT(numBoostingIters=500, maxTreeDepth=6)
modelBoostedDT.fit(X,y)

# output predictions on the remaining data
#ypred_BoostedDT = modelBoostedDT.predict(X)
#accuracyBoostedDT = accuracy_score(y, ypred_BoostedDT)

#print 'Calculated boostedDT accuracy: ',accuracyBoostedDT

#output predictions for unlabeled data
ypred_unlabeled_BoostedDT = modelBoostedDT.predict(Xtest_unlabeled)

f = open('data/predictions-BoostedDT.dat', 'w')
predict_string = ''
for i in xrange(len(ypred_unlabeled_BoostedDT)):
    if i == 0:
n,d = Xdata.shape
nTrain = 0.5*n

idx = np.arange(n)
np.random.seed(22)
np.random.shuffle(idx)
Xdata = Xdata[idx]
ydata = ydata[idx]

boost_iter_list = []
for i in [100,1000,10000]:
    depth_list = []
    for j in [1,2,3]:
        test_accuracy_list = []
        modelBoostedDT = BoostedDT(numBoostingIters=i, maxTreeDepth=j)
        kf = KFold(2000, n_folds=10)
        for train_index, test_index in kf:
            Xtrain, Xtest = Xdata[train_index], Xdata[test_index]
            ytrain, ytest = ydata[train_index], ydata[test_index]
            modelBoostedDT.fit(Xtrain,ytrain) 
            test_ypred_BoostedDT = modelBoostedDT.predict(Xtest)
            test_accuracyBoostedDT = accuracy_score(ytest, test_ypred_BoostedDT)
            test_accuracy_list.append(test_accuracyBoostedDT)
        depth_list.append(np.mean(test_accuracy_list))
    boost_iter_list.append(depth_list)
'''
output
[[0.031000000000000007, 0.016499999999999997, 0.017000000000000005],
 [0.027000000000000003, 0.019000000000000003, 0.013500000000000002],
 [0.026500000000000003, 0.020500000000000001, 0.013000000000000001]] 
np.random.shuffle(idx)
X = X[idx]
y = y[idx]

# split the data
Xtrain = X[:nTrain,:]
ytrain = y[:nTrain]
Xtest = X[nTrain:,:]
ytest = y[nTrain:]

# train the decision tree
modelDT = DecisionTreeClassifier()
modelDT.fit(Xtrain,ytrain)

# train the boosted DT
modelBoostedDT = BoostedDT(numBoostingIters=100, maxTreeDepth=2)
modelBoostedDT.fit(Xtrain,ytrain)

# output predictions on the remaining data
ypred_DT = modelDT.predict(Xtest)
ypred_BoostedDT = modelBoostedDT.predict(Xtest)

# compute the training accuracy of the model
accuracyDT = accuracy_score(ytest, ypred_DT)
accuracyBoostedDT = accuracy_score(ytest, ypred_BoostedDT)

print "Decision Tree Accuracy = "+str(accuracyDT)
print "Boosted Decision Tree Accuracy = "+str(accuracyBoostedDT)

# predict data/challengeTrainLabeled.dat
newBoosted = BoostedDT(numBoostingIters=100, maxTreeDepth=2)