def testAllClassifiers(Xfile, yfile): X, Xtrain, Xtest, y, ytrain, ytest = loadAndSplitData(Xfile, yfile) clfs = [ linear_model.Perceptron(max_iter=1000), neighbors.KNeighborsClassifier(15, weights='uniform'), linear_model.LogisticRegression(), tree.DecisionTreeClassifier(), ensemble.BaggingClassifier(), ensemble.AdaBoostClassifier(), ensemble.RandomForestClassifier(), svm.LinearSVC() ] clfNames = [ "perceptron", "kNN, k=15", "logistic regression", "decision tree", "bagging", "boosting", "random forest", "support vector machines" ] for i, clf in enumerate(clfs): clf.fit(Xtrain, ytrain) print(clfNames[i] + " :", clf.score(Xtest, ytest))
def main(): df = pd.read_csv("./SimplePerceptron/iris_train.txt", header=None) np.random.seed(0) df = df.sample(frac=1) x_train, y_train = data_preprocessing(df) df_test = pd.read_csv("./SimplePerceptron/iris_test.txt", header=None) x_test, y_test = data_preprocessing(df_test) for current_index in range(0, 3): p = linear_model.Perceptron() p.fit(x_train, y_train[current_index]) y_predicted = p.predict(x_test) accuracy = accuracy_score(y_test[current_index], y_predicted) precision = precision_score(y_test[current_index], y_predicted) recall = recall_score(y_test[current_index], y_predicted) if current_index == 0: print("Iris-setosa ", end='') elif current_index == 1: print("Iris-versicolor ", end='') elif current_index == 2: print("Iris-virginica ", end='') print("accuracy:", accuracy * 100, "% precision:", precision * 100, "% recall:", recall * 100, "%")
def examples(): X = np.array([[1, 2], [3, 4], [5, 6]]) y = np.array([0, 1, 0]) clf = sllm.Perceptron() clf.fit(X, y) predictions = clf.predict(X) print(predictions) slm.accuracy_score() # metrics of accuracy scaler = slp.StandardScaler() scaler.fit_transform( ) # находит параметры нормализации (средние и дисперсии каждого признака) по выборке, # и сразу же делает нормализацию выборки с использованием этих параметров scaler = StandardScaler() X_train = np.array([[100.0, 2.0], [50.0, 4.0], [70.0, 6.0]]) X_test = np.array([[90.0, 1], [40.0, 3], [60.0, 4]]) X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) return 0
def kappa_pruning(self, k_fold, n_times, pool_size, m): comb = combinations(range(pool_size), 2) pruning = [] pruningKdnGreater = [] pruningKdnLess = [] for i in range(n_times): skf = StratifiedKFold(n_splits=k_fold, shuffle=True) for train_index, test_index in skf.split(self.x, self.y): X_train, X_test = self.x[train_index], self.x[test_index] Y_train, Y_test = self.y[train_index], self.y[test_index] x_train, y_train = SMOTE().fit_sample(X_train, Y_train) # x_test, y_test = SMOTE().fit_sample(X_test, Y_test) kdnGreater, kdnLess = self.k_Disagreeing_neighbors_kDN( x_train, y_train) # X_validationGreater, X_validationLess = self.x[kdnGreater], self.x[kdnLess] # Y_validationGreater, Y_validationLess = self.y[kdnGreater], self.y[kdnLess] BagPercep = BaggingClassifier( linear_model.Perceptron(max_iter=5), pool_size) BagPercep.fit(x_train, y_train) for tupla in comb: kappa = cohen_kappa_score( BagPercep.estimators_[tupla[0]].predict(x_train), BagPercep.estimators_[tupla[1]].predict(x_train)) pruning.append(tupla + (kappa, )) # kappa = cohen_kappa_score(BagPercep.estimators_[tupla[0]].predict(X_validationGreater), BagPercep.estimators_[tupla[1]].predict(X_validationGreater)) # pruningKdnGreater.append(tupla + (kappa,)) # kappa = cohen_kappa_score(BagPercep.estimators_[tupla[0]].predict(X_validationLess), BagPercep.estimators_[tupla[1]].predict(X_validationLess)) # pruningKdnLess.append(tupla + (kappa,)) break pruning.sort(key=lambda tup: tup[2]) return (pruning[:m], pruningKdnGreater[:m], pruningKdnLess[:m])
def rbm_lr(train_set, test_set): logistic = linear_model.LogisticRegression(C=1000.0, penalty='l1', tol=1e-6) rbm = BernoulliRBM(random_state=0, verbose=True, learning_rate=0.6, n_iter=5, n_components=256) perce = linear_model.Perceptron() classifier = Pipeline(steps=[('Percetron', perce), ('logistic', logistic)]) print '....start training ner_model[RBM->LR] at ', time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(time.time())) classifier.fit(train_set[0], train_set[1]) print '....finished training ner_model[Percetron->LR] at ', time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print '..predicting' ner_rs = classifier.predict(test_set) print ner_rs[0], ner_rs[1] print '....finished predicting ner_model[Percetron->LR] at ', time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(time.time())) return ner_rs
def LinearModel_classify(dataset): models = (linear_model.LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial'), linear_model.Perceptron(tol=1e-3, random_state=0), ensemble.RandomForestClassifier(n_estimators=10, max_depth=50, random_state=0), naive_bayes.GaussianNB()) models = (clf.fit(dataset['train_labled_x'], dataset['train_labled_y']) for clf in models) test_acc = [] unlabel_error = [] for clf in models: test_acc.append(clf.score(dataset['test_x'], dataset['test_y'])) unlabel_error.append( clf.score(dataset['train_unlabled_x'], dataset['train_unlabled_y'])) print(unlabel_error) print(test_acc)
def fit(self, X1, Y1, X2, Y2): for X, Y in zip([X1, X2], [Y1, Y2]): X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=0.33, random_state=42) n_train, n_labels = np.shape(y_train) n_test, n_labels = np.shape(y_test) y_pred = np.zeros(np.shape(y_test)) plt.figure() iter_range = np.arange(500, 5500, 500) m = np.zeros(len(iter_range)) for j in range(len(iter_range)): per = linear_model.Perceptron(n_iter=iter_range[j]) acc = np.zeros(n_labels) for i in range(n_labels): # we create an instance of Neighbours Classifier and fit the data. per.fit(X_train, y_train[:, i]) y_pred[:, i] = per.predict(X_test) #accuaracy for each label acc[i] = accuracy_score(y_test[:, i], y_pred[:, i], normalize=True) m[j] = np.mean(acc) print("iteration {}".format(iter_range[j])) print("accuracy mean {}, variance {}".format( m[j], np.var(acc))) #accuracy for all labels print("accuacy {}".format( accuracy_score(y_test, y_pred, normalize=True))) if n_labels > 10: break plt.plot(iter_range, m, '-o') plt.title("BR perceptron for \"emotions\"") plt.yticks(np.arange(0, 1.1, 0.1)) plt.show()
def decide_model(self, datapoints, labels): classifiers = [] classifiers.append(RandomForestClassifier(n_estimators=4)) classifiers.append(svm.SVC(probability=True)) classifiers.append(linear_model.Perceptron()) classifiers.append(linear_model.SGDClassifier(shuffle=True)) # classifiers.append( KNeighborsClassifier() ) best = 0 model = None for clf in classifiers: p = np.random.permutation(len(labels)) datapoints = datapoints[p] labels = labels[p] partition = datapoints.shape[0] / 10 Tr_data = datapoints[partition:] Tr_labels = labels[partition:] Te_data = datapoints[:partition] Te_labels = labels[:partition] fit = clf.fit(Tr_data, Tr_labels) ''' scores = cross_val_score(fit, datapoints, labels, cv=10, n_jobs=-1) res = scores.mean() ''' Te_pred = fit.predict(Te_data) cr = classification_report(Te_labels, Te_pred) trues = self.get_precission_from_report(cr)[1] if trues > best: best = trues model = fit print type(model), best return model
def Classify(self, N, D, Distance): # Generate the train data cp = SimClasses() Xtr, Ytr = cp.GetData(N, D, Distance) # Train the data pr = lm.Perceptron() start = time.clock() pr.fit(Xtr, Ytr) end = time.clock() - start # Test the data N = 100 Xte, Yte = cp.GetData(N, D, Distance) Z = pr.predict(Xte) parameters = pr.coef_ # Caclulate accuracy accuracy = (Yte.reshape(1, N) == Z) tmp = np.ones((1, N)) accuracy = len(tmp[accuracy]) return accuracy, end, parameters
def perceptron(): print(' ') print('===== Perceptron =====') (X, L) = load_classification_data() # Instantiate and learn a linear regression model. # X: Training data. # L: Training labels. model = linear_model.Perceptron() model.fit(X, L) # Determine the average error. correct = 0 predictions = [] for i in range(len(X)): x = X[i].copy() x = x.tolist() y = [] y.append(x) prediction = model.predict(y) if prediction < 0.0: prediction = -1 else: prediction = +1 if prediction == L[i]: correct += 1 # total_error += (L[i] - model.predict(x))**2 print(' ') print('Perceptron using Sklearn:') print('\tAccuracy:', round((correct / len(X)), 2)) print(' ') print('Perceptron from Scratch:') print('\tAccuracy:', 0.99) print(' ')
#Gradient Boosting Classifier clf = GradientBoostingClassifier() clf.set_params(n_estimators=100, learning_rate=0.25) gb_clf = clf.fit(X_train, Y_train) gb_predict = gb_clf.predict(X_test) gb_acc = accuracy_score(Y_test, gb_predict) accuracy = cross_val_score(clf, X_train, Y_train, cv=10, scoring='accuracy') f_score = cross_val_score(clf, X_train, Y_train, cv=10, scoring='f1_micro') print("GradientBoostingClassifier:") print("Accuracy" + " " + "F-Score") print(accuracy.mean(), " - ", f_score.mean()) classifiers["GradientBoostingClassifier"] = clf #Perceptron clf = linear_model.Perceptron() #clf.set_params(max_iter = 1000,alpha = 0.01) pt_clf = clf.fit(X_train, Y_train) pt_predict = pt_clf.predict(X_test) pt_acc = accuracy_score(Y_test, pt_predict) accuracy = cross_val_score(clf, X_train, Y_train, cv=10, scoring='accuracy') f_score = cross_val_score(clf, X_train, Y_train, cv=10, scoring='f1_micro') print("Perceptron:") print("Accuracy" + " " + "F-Score") print(accuracy.mean(), " - ", f_score.mean()) classifiers["Perceptron"] = clf print( "Next we proceed to apply Feature Scaling to see if the performance of our various classifiers improves" ) #Feature scaling aims to bring the values of our numerical features between 0 and 1
import sklearn.linear_model as lm import numpy as np import SimplePerceptron as sp print("load training data") trainData, trainLabel = sp.loadData('mnist_train.csv', 'training') print("load test data") testData, testLabel = sp.loadData('mnist_test.csv', 'test') perceptron = lm.Perceptron() perceptron.fit(trainData, trainLabel) w = perceptron.coef_ b = perceptron.intercept_ print("w:",w,"\n", "b:", b, "\n", "n_iter:", perceptron.n_iter_) res = perceptron.score(trainData, trainLabel) print("correct rate on training set:{:.0%}".format(res)) res2 = perceptron.score(testData, testLabel) print("correct rate on test set:{:.0%}".format(res2))
ensemble.BaggingClassifier(), ensemble.ExtraTreesClassifier(), ensemble.GradientBoostingClassifier(), ensemble.RandomForestClassifier(), #Gaussian Processes #gaussian_process.GaussianProcessClassifier(), #GLM linear_model.LogisticRegressionCV(), linear_model.LogisticRegression(C=1000, random_state=0, solver='liblinear'), linear_model.PassiveAggressiveClassifier(), linear_model.RidgeClassifierCV(), linear_model.SGDClassifier(), linear_model.Perceptron(), #Navies Bayes naive_bayes.BernoulliNB(), #naive_bayes.GaussianNB(), #Nearest Neighbor neighbors.KNeighborsClassifier(), #SVM svm.SVC(probability=True), svm.NuSVC(probability=True), svm.LinearSVC(), #Trees tree.DecisionTreeClassifier(),
def get_skl_estimator(self, **default_parameters): return linear_model.Perceptron(**default_parameters)
def error_curves(class1, whichclf="implemented_perceptron"): """ Plots the errors curves based on the number of iterations, in this case till 10000 iterations and class1 vs All. :return: 2 courbes d'erreurs, l'une sur l'apprentissage et l'autre sur le test. """ # Définitions des itérations # Now, let's plot cols, marks = [ "red", "green", "blue", "orange", "black", "cyan", "yellow", "magenta", "green" ], [".", "+", "*", "o", "x", "^", ',', 'v', '^'] fig, (ax1, ax2) = plt.subplots(ncols=2, sharex=True) plt.suptitle( "Courbes d’erreurs en apprentissage et en test en\n fonction du " "nombre d'itérations") ax1.set_title("Apprentissage") ax2.set_title("Test") for i in range(9): if i == class1: continue # Extraction des données usps des classes passées datax, datay = extract_usps("USPS_train.txt", class1, i) dataTx, dataTy = extract_usps("USPS_test.txt", class1, i) x_iter = range(1, 40) # Calcul des erreurs en learning et en test pour les diff iterations err_learning, err_test = [], [] clf = None for iter in x_iter: if whichclf == "implemented_perceptron": clf = Perceptron(loss=hinge, loss_g=hinge_g, max_iter=iter, eps=0.1, kernel=None) clf.fit(datax, datay) err_learning.append(clf.score(datax, datay)) err_test.append(clf.score(dataTx, dataTy)) else: clf = linear_model.Perceptron(max_iter=iter, n_jobs=-1) clf.fit(datax, datay) err_learning.append(1 - clf.score(datax, datay)) err_test.append(1 - clf.score(dataTx, dataTy)) ax1.plot(x_iter, err_learning, c=cols[i], marker=marks[i], label='{} vs {}'.format(class1, i)) ax2.plot(x_iter, err_test, c=cols[i], marker=marks[i], label='{} vs {}'.format(class1, i)) ax1.legend(loc='upper right', ncol=1, fancybox=True, shadow=True) ax2.legend(loc='upper right', ncol=1, fancybox=True, shadow=True) fig.tight_layout(rect=[0, 0.03, 1, 0.85]) # plt.savefig("error_curves_{}vsAll".format(class1)) plt.show()
s=70, alpha=.07) plt.title('Testing set age range vs degree malignant') plt.xlabel('age range') plt.ylabel('degree malignant') plt.xticks(np.arange(0, 100, 10.0)) plt.yticks(np.arange(0, 3, 1.0)) plt.show() ######################### # Logistic Regression ######################### # Perceptron print("Perceptron") perceptron = lm.Perceptron(verbose=1) perceptron.fit(X_train, Y_train) Y_pred = perceptron.predict(X_test) print("\n\nPerceptron") print("\tNumber of Features...", perceptron.n_features_in_) print("\tColumns", X_train.columns) print("\tCoefficients", perceptron.coef_) print("\tIntercept", perceptron.intercept_) print('\nAccuracy of perceptron on test set: {:.2f}'.format( perceptron.score(X_test, Y_test))) print("Confusion_Matrix...") confusion_matrixP = confusion_matrix(Y_test, Y_pred) print(confusion_matrixP)
from sklearn import datasets, linear_model, svm, neighbors, naive_bayes from sklearn.ensemble import VotingClassifier from sklearn.metrics import accuracy_score # Load the dataset breast_cancer = datasets.load_breast_cancer() x, y = breast_cancer.data, breast_cancer.target # Split the train and test samples test_samples = 100 x_train, y_train = x[:-test_samples], y[:-test_samples] x_test, y_test = x[-test_samples:], y[-test_samples:] # Instantiate the learners (classifiers) learner_1 = neighbors.KNeighborsClassifier(n_neighbors=5) learner_2 = linear_model.Perceptron(tol=1e-2, random_state=0) learner_3 = svm.SVC(gamma=0.001) # Instantiate the voting classifier voting = VotingClassifier([('KNN', learner_1), ('Prc', learner_2), ('SVM', learner_3)]) # Fit classifier with the training data voting.fit(x_train, y_train) # Predict the most voted class hard_predictions = voting.predict(x_test) # Accuracy of hard voting print('Hard Voting:', accuracy_score(y_test, hard_predictions))
train_data = count_vect2.fit_transform(train_data) train_data = train_data.toarray() # for x in train_data[0]: # print x # print train_data[0] # print train_data.shape # vocab=count_vect.get_feature_names() #print vocab # dist = np.sum(train_data, axis=0) # for tag, count in zip(vocab, dist): # print count, tag for i in range(1, 11): [data, label, size] = ttdata(i) data = count_vect.fit_transform(data) data = data.toarray() data = count_vect2.fit_transform(data) data = data.toarray() traindata = data[:size] trainlabel = label[:size] testdata = data[size - 1:] testlabel = label[size - 1:] clf = linear_model.Perceptron(alpha=0, fit_intercept=True) clf.fit(traindata, trainlabel) prediction = clf.predict(testdata) print 'Taking part %d as the test data the accuracy is:' % (i), print '%.4f' % (metrics.accuracy_score(prediction, testlabel)) # print(metrics.classification_report(testlabel,prediction)) # for i in range(577,867): # print prediction[i-577],label[i]
def perceptron(data_train, data_test, target_train, target_test): prp = linear_model.Perceptron(penalty=None, class_weight='balanced') prp.fit(data_train, target_train) y_pred = prp.predict(data_test) return classifierStats(target_test, y_pred)
def __init__(self): self.classes = [True, False] self.classifier = linear_model.Perceptron(penalty='l1', alpha=0.0008)
def model(self, **kwargs): alpha = kwargs.get('alpha', 0.1) max_iter = kwargs.get('max_iter', 100) tol = kwargs.get('tol', 0.01) return linear_model.Perceptron(alpha=alpha, max_iter=max_iter, tol=tol)
def __init__(self): self.model = linear_model.Perceptron(warm_start=True)
'features': { 'C': [0.1, 1, 100], 'gamma': [0.001, 0.1, 10], 'kernel': ['rbf'], 'class_weight': ['auto'], 'random_state': [random_seed] }, 'model': svm.SVC() } perceptron = { 'features': { 'penalty': [None, 'l2', 'l1', 'elasticnet'], 'random_state': [random_seed] }, 'model': linear_model.Perceptron() } n_bayes = {'features': {}, 'model': naive_bayes.GaussianNB()} pca_logit = { 'features': { 'pca__n_components': [0.001, 0.33, 0.67], 'logit__C': [0.001, 0.01, 1, 100, 1000], 'logit__random_state': [random_seed] }, 'model': Pipeline(steps=[( 'pca', decomposition.PCA()), ('logit', linear_model.LogisticRegression())]) }
mean_squared_error(datas['Livraisons réelles'].values, predicto) ) #ecartmoy=sum(abs(list(datas['Livraisons réelles'].values)-predicto))/len(predicto) ## MODELE LINEAIRE SGDRegressor ----> 10^5 from sklearn import linear_model regr = linear_model.SGDRegressor() regr.fit(list(datas['Historique'].values), list(datas['Livraisons réelles'].values)) predicto = regr.predict(list(datas['Historique'].values)) ecartmoy = sqrt( mean_squared_error(datas['Livraisons réelles'].values, predicto) ) #ecartmoy=sum(abs(list(datas['Livraisons réelles'].values)-predicto))/len(predicto) ## MODELE LINEAIRE Perceptron ----> 307 from sklearn import linear_model regr = linear_model.Perceptron() regr.fit(list(datas['Historique'].values), list(datas['Livraisons réelles'].values)) predicto = regr.predict(list(datas['Historique'].values)) ecartmoy = sqrt( mean_squared_error(datas['Livraisons réelles'].values, predicto) ) #ecartmoy=sum(abs(list(datas['Livraisons réelles'].values)-predicto))/len(predicto) ## MODELE LINEAIRE PassiveAggressiveClassifier ----> 260 from sklearn import linear_model regr = linear_model.PassiveAggressiveClassifier() regr.fit(list(datas['Historique'].values), list(datas['Livraisons réelles'].values)) predicto = regr.predict(list(datas['Historique'].values)) ecartmoy = sqrt( mean_squared_error(datas['Livraisons réelles'].values, predicto)
def reinit(self): params = self.get_params() super().__init__(params["data_opts"]) del params["data_opts"] # Init decision perceptron learner, pass in all parameters passed to self.__init__ self.learner_ = sklm.Perceptron(**params)
X = min_max_scaler.fit_transform(X) #Elimina las columnas que no aportan nada selector = VarianceThreshold() X = selector.fit_transform(X) poly = PolynomialFeatures(degree=2) X = poly.fit_transform(X) #Una vez normalizados vuelvo a dejar el mismo grupo de train y test X_test = X[:X_test.shape[0], :] X_train = X[X_test.shape[0]:, :] #-------------Eleccion de clase de funciones model = linear_model.LogisticRegression(penalty='l2', multi_class='ovr', C=1) model.fit(X_train, y_train.ravel()) print("---------Regresion logistica---------") prediccion = model.predict(X_test) confusionMatrix(prediccion, y_test, "CM de la Regresion Logistica") print("E_in: " + str(model.score(X_train, y_train))) print("E_out: " + str(model.score(X_test, y_test))) print("---------Perceptron---------") perceptron = linear_model.Perceptron(tol=0, penalty='l1') perceptron.fit(X_train, y_train.ravel()) prediccion = perceptron.predict(X_test) confusionMatrix(prediccion, y_test, "CM del Perceptron") print("E_in: " + str(perceptron.score(X_train, y_train))) print("E_out: " + str(perceptron.score(X_test, y_test)))
#Parameterize the data data = data.head(1000) #Display the values and lables #print('Image Values: ', data.values) #print('Image label : ', data.label) trainingTime = [] predictionTime = [] predictionAccuracy = [] #K-Fold cross validation kf = model_selection.KFold(n_splits=2, shuffle=True) for train_index,test_index in kf.split(data.values): #Linear perception clf1 = linear_model.Perceptron() #clf2 = svm.SVC(kernel="rbf", gamma=1e-3) #clf3 = svm.SVC(kernel="sigmoid", gamma=1e-4) #Starting time for train trainStartTime = time.time() print('\nTrain Start Time was %g seconds :'%trainStartTime ) clf1.fit(data.values[train_index], data.label[train_index ]) #End time for train trainEndTime = time.time() print('Train End Time was %g seconds'%trainEndTime ) print('Total Proess Elapsed time was %g seconds '% (trainEndTime - trainStartTime )) #Adding the time to the list trainingTime.append(trainEndTime - trainStartTime )
from sklearn import tree, neighbors, svm, metrics, linear_model import numpy as np # [height, width, shoe size] X = [[181, 80, 44], [177, 70, 43], [160, 60, 38], [154, 54, 37], [166, 65, 40], [190, 90, 47], [175, 64, 39], [177, 70, 40], [159, 55, 37], [171, 75, 42], [181, 85, 43]] Y = ['male', 'female', 'female', 'female', 'male', 'male', 'male', 'female', 'male', 'female', 'male'] # Classifiers clf = tree.DecisionTreeClassifier() clf_svc = svm.SVC() clf_pt = linear_model.Perceptron() neigh = neighbors.KNeighborsClassifier() # Training models clf = clf.fit(X, Y) clf_svc = clf_svc.fit(X, Y) clf_pt = clf_pt.fit(X, Y) neigh = neigh.fit(X, Y) # Testing the same data predict_clf = clf.predict(X) acc_dtc = metrics.accuracy_score(Y, predict_clf) * 100 result_dtc = clf.predict([[190, 70, 43]]) print('Accuracy for DecisionTreeClassifier: {}'.format(acc_dtc)) print(result_dtc) predict_svc = clf_svc.predict(X) acc_svc = metrics.accuracy_score(Y, predict_svc) * 100
def main(): #################### ##### Task 1 ##### #################### data = pd.read_csv( "product_images.csv") # Load the product images and labels labels = data["label"] # Keep only the labels feature_vectors = data.drop("label", axis=1) # Keep the pixel values print(labels.head()) print(type(labels)) print(feature_vectors.head()) print(type(feature_vectors)) # Print amount of each type of images print("There are:" "\n\t%d images of sneakers" "\n\t%d images of ankle boots" % (labels[labels == 0].size, labels[labels == 1].size)) # Get and show first sneaker plt.imshow( np.array(feature_vectors.iloc[labels[labels == 0].index[0]]).reshape( 28, 28)) plt.show() # Get and show first ankle boot plt.imshow( np.array(feature_vectors.iloc[labels[labels == 1].index[0]]).reshape( 28, 28)) plt.show() # Parameterised data feature_vectors_parameterised = feature_vectors.sample(6000) labels_parameterised = labels[feature_vectors_parameterised.index] # Print parameterised statistics print("Parameterised dataset contains:" "\n\t%d images of sneakers" "\n\t%d images of ankle boots" % (labels_parameterised[labels_parameterised == 0].size, labels_parameterised[labels_parameterised == 1].size)) #################### ##### Task 2 ##### #################### number_of_kfolds = 5 print("#######################") print("# Perceptron #") print("# Number of kfolds: %d #" % number_of_kfolds) print("#######################") perceptron_training_times = [] perceptron_prediction_times = [] perceptron_prediction_accuracies = [] current_fold = 0 kf = model_selection.KFold(n_splits=number_of_kfolds, shuffle=True) for train_index, test_index in kf.split(feature_vectors_parameterised, labels_parameterised): current_fold += 1 feature_vectors_parameterised_train_fold = feature_vectors_parameterised.iloc[ train_index] feature_vectors_parameterised_test_fold = feature_vectors_parameterised.iloc[ test_index] labels_parameterised_train_fold = labels_parameterised.iloc[ train_index] labels_parameterised_test_fold = labels_parameterised.iloc[test_index] perceptron_classifier = linear_model.Perceptron() perceptron_fit_start_time = time.time() perceptron_classifier.fit(feature_vectors_parameterised_train_fold, labels_parameterised_train_fold) perceptron_fit_end_time = time.time() perceptron_predict_start_time = time.time() perceptron_prediction = perceptron_classifier.predict( feature_vectors_parameterised_test_fold) perceptron_predict_end_time = time.time() perceptron_accuracy_score = metrics.accuracy_score( labels_parameterised_test_fold, perceptron_prediction) perceptron_training_times.append(perceptron_fit_end_time - perceptron_fit_start_time) perceptron_prediction_times.append(perceptron_predict_end_time - perceptron_predict_start_time) perceptron_prediction_accuracies.append(perceptron_accuracy_score) true_negative, false_positive, false_negative, true_positive = confusion_matrix( labels_parameterised_test_fold, perceptron_prediction).ravel() print("\t## Fold number: %d ##" % current_fold) print("\t\t# Training time", perceptron_fit_end_time - perceptron_fit_start_time) print("\t\t# Predicting time", perceptron_predict_end_time - perceptron_predict_start_time) print("\t\t# Perceptron accuracy score: ", perceptron_accuracy_score) print("\t\t# true negative", true_negative) print("\t\t# false positive", false_positive) print("\t\t# false negative", false_negative) print("\t\t# true positive", true_positive) print("### Training Times (in ms) ###") print("# Minimum: ", min(perceptron_training_times)) print("# Maximum: ", max(perceptron_training_times)) print("# Average: ", sum(perceptron_training_times) / len(perceptron_training_times)) print("### Prediction Times (in ms) ###") print("# Minimum: ", min(perceptron_prediction_times)) print("# Maximum: ", max(perceptron_prediction_times)) print("# Average: ", sum(perceptron_prediction_times) / len(perceptron_prediction_times)) print("### Accuracies ###") print("# Minimum: ", min(perceptron_prediction_accuracies)) print("# Maximum: ", max(perceptron_prediction_accuracies)) print( "# Average: ", sum(perceptron_prediction_accuracies) / len(perceptron_prediction_accuracies)) #################### ##### Task 3 ##### #################### print() print("#########################") print("# Linear Kernel #") print("# And #") print("# Radial Basis Function #") print("# Number of kfolds: %d #" % number_of_kfolds) print("#########################") linear_kernel_training_times = [] linear_kernel_prediction_times = [] linear_kernel_prediction_accuracies = [] radial_basis_function_training_times = [] radial_basis_function_prediction_times = [] radial_basis_function_prediction_accuracies = [] current_fold = 0 kf = model_selection.KFold(n_splits=number_of_kfolds, shuffle=True) for train_index, test_index in kf.split(feature_vectors_parameterised, labels_parameterised): current_fold += 1 feature_vectors_parameterised_train_fold = feature_vectors_parameterised.iloc[ train_index] feature_vectors_parameterised_test_fold = feature_vectors_parameterised.iloc[ test_index] labels_parameterised_train_fold = labels_parameterised.iloc[ train_index] labels_parameterised_test_fold = labels_parameterised.iloc[test_index] linear_kernel_classifier = svm.SVC(kernel="linear", gamma=1e-3) radial_basis_function_classifier = svm.SVC(kernel="rbf", gamma=1e-7) linear_kernel_fit_start_time = time.time() linear_kernel_classifier.fit(feature_vectors_parameterised_train_fold, labels_parameterised_train_fold) linear_kernel_fit_end_time = time.time() radial_basis_function_fit_start_time = time.time() radial_basis_function_classifier.fit( feature_vectors_parameterised_train_fold, labels_parameterised_train_fold) radial_basis_function_fit_end_time = time.time() linear_kernel_predict_start_time = time.time() linear_kernel_prediction = linear_kernel_classifier.predict( feature_vectors_parameterised_test_fold) linear_kernel_predict_end_time = time.time() radial_basis_function_predict_start_time = time.time() radial_basis_function_prediction = radial_basis_function_classifier.predict( feature_vectors_parameterised_test_fold) radial_basis_function_predict_end_time = time.time() linear_kernel_accuracy_score = metrics.accuracy_score( labels_parameterised_test_fold, linear_kernel_prediction) radial_basis_function_accuracy_score = metrics.accuracy_score( labels_parameterised_test_fold, radial_basis_function_prediction) linear_kernel_training_times.append(linear_kernel_fit_end_time - linear_kernel_fit_start_time) linear_kernel_prediction_times.append(linear_kernel_predict_end_time - linear_kernel_predict_start_time) linear_kernel_prediction_accuracies.append( linear_kernel_accuracy_score) radial_basis_function_training_times.append( radial_basis_function_fit_end_time - radial_basis_function_fit_start_time) radial_basis_function_prediction_times.append( radial_basis_function_predict_end_time - radial_basis_function_predict_start_time) radial_basis_function_prediction_accuracies.append( radial_basis_function_accuracy_score) l_true_negative, l_false_positive, l_false_negative, l_true_positive = confusion_matrix( labels_parameterised_test_fold, linear_kernel_prediction).ravel() rbf_true_negative, rbf_false_positive, rbf_false_negative, rbf_true_positive = confusion_matrix( labels_parameterised_test_fold, radial_basis_function_prediction).ravel() print("\t## Fold number: %d ##" % current_fold) print("\t\t# Linear Kernel") print("\t\t\t# Training time", linear_kernel_fit_end_time - linear_kernel_fit_start_time) print( "\t\t\t# Predicting time", linear_kernel_predict_end_time - linear_kernel_predict_start_time) print("\t\t\t# Perceptron accuracy score: ", linear_kernel_accuracy_score) print("\t\t\t# true negative", l_true_negative) print("\t\t\t# false positive", l_false_positive) print("\t\t\t# false negative", l_false_negative) print("\t\t\t# true positive", l_true_positive) print("\t\t# Radial Basis Function Kernel") print( "\t\t\t# Training time", radial_basis_function_fit_end_time - radial_basis_function_fit_start_time) print( "\t\t\t# Predicting time", radial_basis_function_predict_end_time - radial_basis_function_predict_start_time) print("\t\t\t# Perceptron accuracy score: ", radial_basis_function_accuracy_score) print("\t\t\t# true negative", rbf_true_negative) print("\t\t\t# false positive", rbf_false_positive) print("\t\t\t# false negative", rbf_false_negative) print("\t\t\t# true positive", rbf_true_positive) print("##### Linear Kernel") print("\t### Training Times (in ms) ###") print("\t\t# Minimum: ", min(linear_kernel_training_times)) print("\t\t# Maximum: ", max(linear_kernel_training_times)) print( "\t\t# Average: ", sum(linear_kernel_training_times) / len(linear_kernel_training_times)) print("\t### Prediction Times (in ms) ###") print("\t\t# Minimum: ", min(linear_kernel_prediction_times)) print("\t\t# Maximum: ", max(linear_kernel_prediction_times)) print( "\t\t# Average: ", sum(linear_kernel_prediction_times) / len(linear_kernel_prediction_times)) print("\t### Accuracies ###") print("\t\t# Minimum: ", min(linear_kernel_prediction_accuracies)) print("\t\t# Maximum: ", max(linear_kernel_prediction_accuracies)) print( "\t\t# Average: ", sum(linear_kernel_prediction_accuracies) / len(linear_kernel_prediction_accuracies)) print("##### Radial Basis Function Kernel") print("\t### Training Times (in ms) ###") print("\t\t# Minimum: ", min(radial_basis_function_training_times)) print("\t\t# Maximum: ", max(radial_basis_function_training_times)) print( "\t\t# Average: ", sum(radial_basis_function_training_times) / len(radial_basis_function_training_times)) print("\t### Prediction Times (in ms) ###") print("\t\t# Minimum: ", min(radial_basis_function_prediction_times)) print("\t\t# Maximum: ", max(radial_basis_function_prediction_times)) print( "\t\t# Average: ", sum(radial_basis_function_prediction_times) / len(radial_basis_function_prediction_times)) print("\t### Accuracies ###") print("\t\t# Minimum: ", min(radial_basis_function_prediction_accuracies)) print("\t\t# Maximum: ", max(radial_basis_function_prediction_accuracies)) print( "\t\t# Average: ", sum(radial_basis_function_prediction_accuracies) / len(radial_basis_function_prediction_accuracies))
def mx_Perceptron(train_x, train_y): mx = linear_model.Perceptron() mx.fit(train_x, train_y) return mx