########################################### ############## SEEDS DATASET ############## ########################################### from main.ch02.load import load_dataset feature_names = [ 'area', 'perimeter', 'compactness', 'length of kernel', 'width of kernel', 'asymmetry coefficien', 'length of kernel groove', ] features, labels = load_dataset('seeds') from sklearn.neighbors import KNeighborsClassifier classifier = KNeighborsClassifier(n_neighbors=1) from sklearn.cross_validation import KFold kf = KFold(len(features), n_folds=5, shuffle=True) means = [] for training, testing in kf: # We learn a model for this fold with `fit` and then apply it to the # testing data with `predict`: classifier.fit(features[training], labels[training]) prediction = classifier.predict(features[testing]) # np.mean on an array of booleans returns fraction
def __init__(self): global features, labels features, labels = load.load_dataset("seeds") load.test_load(features, labels) logging.info("data loaded successfully")