示例#1
0
文件: SVM.py 项目: himl/boson
    estimator = GridSearchCV(svm.SVC(kernel='rbf'), param_grid)
    return cross_validation_for_grid(estimator, data, targets)


def learn_by_one_feature(data, targets, estimator):
    for columnNumber in range(data.shape[1]):
        mean, standart_deviation, time = cross_validation(estimator,
            data[:, columnNumber:columnNumber + 1], targets)
        print("Column number: %d" % columnNumber)
        print("Accuracy: %0.2f (+/- %0.2f)" % (mean, standart_deviation))
        print("Time: %0.2f" % time)


if __name__ == "__main__":
    data_handler = DataHandler()
    all_data, all_targets = data_handler.get_training_data()

    samples_size = 5000
    data = all_data[-samples_size:]
    targets = all_targets[-samples_size:]

    # estimator = svm.SVC(kernel='linear', C=1)
    # estimator = svm.SVC(kernel='rbf', C=1, gamma=0.0001)
    # mean, standart_deviation, time = cross_validation(estimator, data, targets)

    # mean, standart_deviation, time = find_best_linear_param(data, targets)
    # mean, standart_deviation, time = find_best_rbf_param(data, targets)

    # print("Accuracy: %0.2f (+/- %0.2f)" % (mean, standart_deviation))
    # print("Time: %0.2f" % time)
示例#2
0
from sklearn import tree
from SVM.DataHandler import DataHandler
from SVM.EvaluatingEstimator import cross_validation
from sklearn.grid_search import GridSearchCV
from SVM.EvaluatingEstimator import cross_validation_for_grid
from sklearn.decomposition import RandomizedPCA


dt = DataHandler()
training_data, targets = dt.get_training_data(samples_size=5000)

# training_data, targets, test_data = dt.get_pretreated_data(training_samples_size=5000,
#                                                            test_samples_size=5000)

# pca = RandomizedPCA(n_components=5, whiten=False).fit(training_data)
# training_data = pca.transform(training_data)


estimator = tree.DecisionTreeClassifier(max_depth=6, min_samples_leaf=9)
mean, standart_deviation, time = cross_validation(estimator, training_data, targets)


# param_grid = [{'max_depth': list(range(3, 20)), 'min_samples_leaf': list(range(5, 10)),
#                'min_samples_split': list(range(1, 5))}]
#
# estimator = tree.DecisionTreeClassifier()
# estimator = GridSearchCV(estimator, param_grid)
# mean, standart_deviation, time = cross_validation_for_grid(estimator, training_data, targets)

print("Accuracy: %0.2f (+/- %0.2f)" % (mean, standart_deviation))
print("Time: %0.2f" % time)