def validate_feature_linear(features, labels, classes, n_folds=5, print_folds=True, print_absolute=True, print_logloss=True): kfold = cv.LabelKFold(labels, n_folds) model = lda.LDA() if print_absolute: score = cross_validation.cross_val_score(model, features, classes, cv=kfold) if print_absolute: print("absolute scores") if print_folds: print("\tfolds:", score) if print_absolute: print("\tmean:", score.mean(), "std:", numpy.std(score)) scores = score_calculation.loglossKFold(features, classes, model, kfold, given_kfold=True) if print_logloss: print("logloss scores") if print_folds: print("\tfolds", scores) if print_logloss: print("\tmean:", numpy.mean(scores), "std:", numpy.std(scores))
harald = extractor.calculateDarktoBrightRatio(thumbs[i]) rian = extractor.splitColorFeatures(thumbs[i], splits) features.append(numpy.append(harald, rian)) #model = grid_search.GridSearchCV(svm.SVC(),{'kernel' : ['poly'], 'C' : [1, 10, 100, 1000], 'degree' : [4,7,10], 'shrinking' : [True, False]}) #model.fit(features, classes) #print(model.best_estimator_) #print('\a') print("Producing KFold indexes") kfold = cv.KFold(amount, n_folds=5, shuffle=True) model = lda.LDA() #model = svm.SVC(kernel = 'linear') #model = qda.QDA() score = cross_validation.cross_val_score(model, features, classes, cv=kfold) print("scores ", score) print("mean score ", score.mean()) #model = svm.SVC(kernel = 'linear', probability = True) model = lda.LDA() #model = neighbors.KNeighborsClassifier(n_neighbors = 1) scores = score_calculation.loglossKFold(features, classes, model, 5) print("logloss scores ", scores) print("logloss score mean ", numpy.mean(scores), " ", numpy.std(scores)) #predictions = cross_validation.cross_val_predict(model, features, classes, cv = kfold) #wrongIndexes = numpy.nonzero(predictions != classes) #uniqueWrongs, counts = numpy.unique(numpy.append(predictions[[wrongIndexes]], numpy.array(classes)[[wrongIndexes]]), return_counts = True) #wrongs = uniqueWrongs[counts > 10] print('\a')