示例#1
0
from sklearn.cross_validation import train_test_split 
from sklearn import metrics
import numpy as np
from adaboost_trees import AdaboostTrees

# load data
X = np.loadtxt('../feature/5grams_count_mc_features')
y = np.loadtxt('../data/tag_mc')
X -= X.min()
X /= X.max()
X_train, X_test, y_train, y_test = train_test_split(X, y)

#instanciate forest
forest = AdaboostTrees(20)
forest.train(X_train, y_train)
expected = y_test
predicted = forest.predict(X_test)

# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
nbTrees = 20


#Train dataset
X = np.loadtxt('train_data')
y = np.loadtxt('train_labels')
X, y = shuffle(X, y)


#Data normalization
X -= X.min()
X /= X.max()


#Instanciation
forest = AdaboostTrees(nbTrees)


#Training
forest.train(X, y)


#Test dataset
X = np.loadtxt('test_data')
y = np.loadtxt('test_labels')
X, y = shuffle(X, y)


#Data normalization
X -= X.min()
X /= X.max()