def ex_3_b(x_train, y_train, x_test, y_test): """ Solution for exercise 3 b) :param x_train: Training samples (2-dimensional) :param y_train: Training labels :param x_test: Testing samples (2-dimensional) :param y_test: Testing labels :return: """ ########### ## TODO: ## Train multi-class SVMs with a LINEAR kernel ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix. ## Find the index for which you get the highest error rate. ## Plot the confusion matrix with plot_confusion_matrix. ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist. ########### labels = range(1, 6) sel_error = np.array( [0]) # Numpy indices to select images that are misclassified. i = 0 # should be the label number corresponding the largest classification error # Plot with mnist plot plot_mnist(x_test[sel_err], y_pred[sel_err], labels=labels[i], k_plots=10, prefix='Predicted class')
def ex_3_b(x_train, y_train, x_test, y_test): """ Solution for exercise 3 b) :param x_train: Training samples (2-dimensional) :param y_train: Training labels :param x_test: Testing samples (2-dimensional) :param y_test: Testing labels :return: """ ########### ## TODO: ## Train multi-class SVMs with a LINEAR kernel ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix. ## Find the index for which you get the highest error rate. ## Plot the confusion matrix with plot_confusion_matrix. ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist. ########### C = 3e-4 linSVM = svm.SVC(kernel="linear", decision_function_shape='ovr', C=C) linSVM.fit(x_train, y_train) labels = range(1, 6) y_pred = linSVM.predict(x_test) conf_matrix = confusion_matrix(y_test, y_pred) plot_confusion_matrix(conf_matrix, labels) most_misclassified_prob = [] for col in range(conf_matrix.shape[1]): item_count = 0 most_misclassified_value = 0 for row in range(conf_matrix.shape[0]): item_count += conf_matrix[row, col] if row != col and most_misclassified_value < conf_matrix[row, col]: most_misclassified_value = conf_matrix[row, col] most_misclassified_prob.append(most_misclassified_value / item_count) i = np.argmax(most_misclassified_prob) + 1 sel_error = np.array( []) # Numpy indices to select images that are misclassified. for j in range(y_pred.shape[0]): if y_pred[j] == i and y_pred[j] != y_test[j]: sel_error = np.append(sel_error, j) if len(sel_error) == 10: break sel_error = sel_error.astype(int) # Plot with mnist plot plot_mnist(x_test[sel_error], y_pred[sel_error], labels=labels[i - 1], k_plots=10, prefix='Predicted class')
def ex_3(): data = load_data('data_mnist.json') x_train, y_train, x_test, y_test = \ data['X'], data['Y'].ravel(), data['XT'], data['YT'].ravel() plot_mnist(x_train, y_train) ex_3_a(x_train, y_train, x_test, y_test)
def ex_3(): data = load_data('data_mnist.json') # Normalize data from [0,255] to [0,1] x_train, y_train, x_test, y_test = \ data['X'] / 255, data['Y'].ravel(), data['XT'] / 255, data['YT'].ravel() plot_mnist(x_train, y_train) ex_3_a(x_train, y_train, x_test, y_test) ex_3_b(x_train, y_train, x_test, y_test)
def ex_3_b(x_train, y_train, x_test, y_test): """ Solution for exercise 3 b) :param x_train: Training samples (2-dimensional) :param y_train: Training labels :param x_test: Testing samples (2-dimensional) :param y_test: Testing labels :return: """ ########### ## TODO: ## Train multi-class SVMs with a LINEAR kernel ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix. ## Find the index for which you get the highest error rate. ## Plot the confusion matrix with plot_confusion_matrix. ## Plot the first 10 images classified as the most misclassified digit using plot_mnist. ########### labels = range(1, 6) lin = svm.SVC(decision_function_shape='ovr', kernel='linear') lin.fit(x_train, y_train) y_test_predict =lin.predict(x_test) score_train = lin.score(x_train, y_train) score_test = lin.score(x_test, y_test) cm = confusion_matrix(y_test, y_test_predict) plot_confusion_matrix(cm, labels) #print(cm) diff_list = y_test_predict == y_test # indexes of all missclassiefied images misclassifieds = [i for i, val in enumerate(diff_list) if val == False] # remove diagonal elements from cm for later processing cm_no_diagonal = cm np.fill_diagonal(cm_no_diagonal, 0) #print(cm_no_diagonal) errors_per_class = np.sum(cm_no_diagonal, axis=0) #print(errors_per_class) sel_err = np.array(misclassifieds) # CHANGE ME! Numpy indices to select all images that are misclassified. i = np.argmax(errors_per_class) # CHANGE ME! Should be the label number corresponding the largest classification error. #print(i) # Plot with mnist plot plot_mnist(x_test[sel_err], y_test_predict[sel_err], labels=labels[i], k_plots=10, prefix='Predicted class')
def ex_3_b(x_train, y_train, x_test, y_test): """ Solution for exercise 3 b) :param x_train: Training samples (2-dimensional) :param y_train: Training labels :param x_test: Testing samples (2-dimensional) :param y_test: Testing labels :return: """ ########### ## TODO: ## Train multi-class SVMs with a LINEAR kernel ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix. ## Find the index for which you get the highest error rate. ## Plot the confusion matrix with plot_confusion_matrix. ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist. ########### labels = range(1, 6) SVMlin = svm.SVC(decision_function_shape='ovr', C=10, kernel='linear') SVMlin.fit(x_train, y_train) scorelin_train = SVMlin.score(x_train, y_train) scorelin_test = SVMlin.score(x_test, y_test) y_pred = SVMlin.predict(x_test) conf_M = confusion_matrix(y_test, y_pred) most_missclass = np.argmin(np.diagonal(conf_M)) + 1 plot_confusion_matrix(conf_M, labels) print(most_missclass) index_3 = np.where(y_test == 3) sel_err1 = np.array( [0]) # Numpy indices to select images that are misclassified. sel_err = np.array( [0]) # Numpy indices to select images that are misclassified. sel_err1 = y_pred[(y_pred[index_3] - y_test[index_3] != 0) == True] print(sel_err1) sel_err = index_3[np.asarray(sel_err1)] print(index_3, np.where(y_pred[index_3] - y_test[index_3] != 0)) i = most_missclass # should be the label number corresponding the largest classification error # Plot with mnist plot plot_mnist(x_test[sel_err], y_pred[sel_err], labels=labels[i], k_plots=10, prefix='Predicted class')
def ex_3_b(x_train, y_train, x_test, y_test): """ Solution for exercise 3 b) :param x_train: Training samples (2-dimensional) :param y_train: Training labels :param x_test: Testing samples (2-dimensional) :param y_test: Testing labels :return: """ ########### ## TODO: ## Train multi-class SVMs with a LINEAR kernel ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix. ## Find the index for which you get the highest error rate. ## Plot the confusion matrix with plot_confusion_matrix. ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist. ########### C = 0.0003 clf = svm.SVC(C=C, kernel='linear', decision_function_shape='ovr') clf.fit(x_train, y_train) y_pred = clf.predict(x_test) labels = range(1, 6) plot_confusion_matrix(confusion_matrix(y_test, y_pred), labels) sel_error = np.array( [0]) # Numpy indices to select images that are misclassified. i = 0 # should be the label number corresponding the largest classification error #in order to find the most missclassified we sum up the missclassified of every label and then we find the one with maximum error sums = np.zeros((5, )) k = 0 for j in y_pred: if j != y_test[k]: sums[y_test[k] - 1] += 1 sel_error = np.append(sel_error, k) k += 1 i = np.argmax(sums) # Plot with mnist plot plot_mnist(x_test[sel_error], y_pred[sel_error], labels=labels[i], k_plots=10, prefix='Predicted class')
def ex_3_b(x_train, y_train, x_test, y_test): """ Solution for exercise 3 b) :param x_train: Training samples (2-dimensional) :param y_train: Training labels :param x_test: Testing samples (2-dimensional) :param y_test: Testing labels :return: """ ########### ## Train multi-class SVMs with a LINEAR kernel ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix. ## Find the index for which you get the highest error rate. ## Plot the confusion matrix with plot_confusion_matrix. ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist. ########### labels = range(1, 6) linear = svm.SVC(kernel='linear', C=10, decision_function_shape='ovr') linear.fit(x_train, y_train) y_pred = linear.predict(x_test) cm = confusion_matrix(y_test, y_pred) plot_confusion_matrix(cm, labels) errors = np.zeros(5) for i in range(5): for j in range(5): if i != j: errors[j] += cm[i][j] max_err_label = np.argmax( errors ) + 1 # should be the label number corresponding the largest classification error indices = np.nonzero(y_pred == max_err_label)[0].astype(int) sel_err = np.array( [], dtype=int) # Numpy indices to select images that are misclassified. for i in indices: if y_test[i] != y_pred[i]: sel_err = np.insert(sel_err, sel_err.size, i) # Plot with mnist plot plot_mnist(x_test[sel_err], y_pred[sel_err], labels=max_err_label, k_plots=10, prefix='Predicted class')
def ex_3_b(x_train, y_train, x_test, y_test): """ Solution for exercise 3 b) :param x_train: Training samples (2-dimensional) :param y_train: Training labels :param x_test: Testing samples (2-dimensional) :param y_test: Testing labels :return: """ ########### ## TODO: ## Train multi-class SVMs with a LINEAR kernel ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix. ## Find the index for which you get the highest error rate. ## Plot the confusion matrix with plot_confusion_matrix. ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist. ########### labels = range(1, 6) svc_ovo = svm.SVC(kernel='linear', decision_function_shape='ovo', C=10).fit(x_train, y_train) y_pred = svc_ovo.predict(x_test) cm = confusion_matrix(y_test, y_pred) plot_confusion_matrix(cm, labels) cp = cm np.fill_diagonal(cp, 0) i = np.argmax( np.max(cp, axis=0) ) # should be the label number corresponding the largest classification error sel_err = np.argwhere(np.not_equal( y_test, y_pred)) # Numpy indices to select images that are misclassified. import pdb pdb.set_trace() plot_mnist(x_test[sel_err], y_pred[sel_err], labels=labels[i], k_plots=10, prefix='Real class')
def ex_3_b(x_train, y_train, x_test, y_test): """ Solution for exercise 3 b) :param x_train: Training samples (2-dimensional) :param y_train: Training labels :param x_test: Testing samples (2-dimensional) :param y_test: Testing labels :return: """ ########### ## TODO: ## Train multi-class SVMs with a LINEAR kernel ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix. ## Find the index for which you get the highest error rate. ## Plot the confusion matrix with plot_confusion_matrix. ## Plot the first 10 images classified as the most misclassified digit using plot_mnist. ########### clf = svm.SVC(kernel="linear", decision_function_shape='ovr', C=10) clf.fit(x_train, y_train) y_pred = clf.predict(x_test) labels = range(1, 6) plot_confusion_matrix(confusion_matrix(y_test, y_pred), labels) print("conf: ", confusion_matrix(y_test, y_pred)) sel_err = np.array([ 9, 25, 643, 654, 668, 685, 696, 727, 738, 739 ]) # CHANGE ME! Numpy indices to select all images that are misclassified. i = 0 # CHANGE ME! Should be the label number corresponding the largest classification error. i = 2 j = 0 print("sel_err ", sel_err) # Plot with mnist plot plot_mnist(x_test[sel_err], y_pred[sel_err], labels=labels[i], k_plots=10, prefix='Predicted class')
def ex_3_b(x_train, y_train, x_test, y_test): """ Solution for exercise 3 b) :param x_train: Training samples (2-dimensional) :param y_train: Training labels :param x_test: Testing samples (2-dimensional) :param y_test: Testing labels :return: """ ########### # TODO: # Train multi-class SVMs with a LINEAR kernel # Use the sklearn.metrics.confusion_matrix to plot the confusion matrix. # Find the index for which you get the highest error rate. # Plot the confusion matrix with plot_confusion_matrix. # Plot the first 10 occurrences of the most misclassified digit using plot_mnist. ########### labels = range(1, 6) svc = svm.SVC(C=10, kernel=LINEAR) svc.fit(x_train, y_train) y_pred = svc.predict(x_test) con_matrix = confusion_matrix(y_test, y_pred, labels) plot_confusion_matrix(con_matrix, labels) sel_error = np.where(y_test != y_pred) error_list = y_pred[sel_error] occurences = Counter(error_list) # should be the label number corresponding the largest classification error i = max(occurences) print("Label corresponding to the largest classification error : ", i) plot_mnist(x_test[sel_error], y_pred[sel_error], labels=i, k_plots=10, prefix='Predicted class')
def ex_3_b(x_train, y_train, x_test, y_test): """ Solution for exercise 3 b) :param x_train: Training samples (2-dimensional) :param y_train: Training labels :param x_test: Testing samples (2-dimensional) :param y_test: Testing labels :return: """ ########### ## TODO: ## Train multi-class SVMs with a LINEAR kernel ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix. ## Find the index for which you get the highest error rate. ## Plot the confusion matrix with plot_confusion_matrix. ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist. ########### labels = range(1, 6) clf = svm.SVC(kernel="linear", C=3e-4, decision_function_shape='ovr') clf.fit(x_train, y_train) y_pred = clf.predict(x_test) cm = confusion_matrix(y_test, y_pred, labels) plot_confusion_matrix(cm, labels) sel_err = np.where( y_test != y_pred) # Numpy indices to select images that are misclassified. np.fill_diagonal(cm, 0) i, j = np.unravel_index(cm.argmax(), cm.shape) # Plot with mnist plot plot_mnist(x_test[sel_err], y_pred[sel_err], labels=labels[i], k_plots=10, prefix='predicted class')
def ex_3_b(x_train, y_train, x_test, y_test): """ Solution for exercise 3 b) :param x_train: Training samples (2-dimensional) :param y_train: Training labels :param x_test: Testing samples (2-dimensional) :param y_test: Testing labels :return: """ ########### ## TODO: ## Train multi-class SVMs with a LINEAR kernel ## Use the sklearn.metrics.confusion_matrix to plot the confusion matrix. ## Find the index for which you get the highest error rate. ## Plot the confusion matrix with plot_confusion_matrix. ## Plot the first 10 occurrences of the most misclassified digit using plot_mnist. ########### # helper variables m = 0 c = 10 kernel_mode = 'linear' # init linear svm and train it lin_svm = svm.SVC(kernel=kernel_mode, C=c) lin_svm.fit(x_train, y_train) # pred y to plot conf matrix y_pred = lin_svm.predict(x_test) cm = confusion_matrix(y_test, y_pred) print(cm) plot_confusion_matrix(cm, lin_svm.classes_) # helper variables most_misclassified_number = 0 temp_m = cm[0][0] # searching for the most missclassifed number/label for m in range(1, 5): if (temp_m > cm[m][m]): temp_m = cm[m][m] most_misclassified_number = m # given labels labels = range(1, 6) # helper variables temp_list = [] image_counter = 0 max_pred = len(y_pred) m = 0 # getting indices of missclassified numbers for m in range(0, max_pred): if (labels[most_misclassified_number] == y_pred[m]): if (y_test[m] != y_pred[m]): # add the missclassified image-index to the list temp_list.append(m) image_counter = image_counter + 1 # if we have 10 images stop if (image_counter == 10): break # given output/plot -------------------------------------------------------------------------------- # Numpy indices to select images that are misclassified. sel_err = np.array(temp_list) # should be the label number corresponding the largest classification error i = most_misclassified_number # Plot with mnist plot plot_mnist(x_test[sel_err], y_pred[sel_err], labels=labels[i], k_plots=10, prefix='Predicted class')