def train_and_save_model(model='xvector', binary_class=False, single_class='glass'): model = define_xvector() model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.001), metrics=['acc', km.precision(label=1), km.recall(label=0)]) model.summary() callback_list = [ ModelCheckpoint( 'checkpoint-{epoch:02d}.h5', monitor='loss', verbose=1, save_best_only=True, period=2 ), # do the check point each epoch, and save the best model ReduceLROnPlateau( monitor='loss', patience=3, verbose=1, min_lr=1e-6 ), # reducing the learning rate if the val_loss is not improving CSVLogger(filename='training_log.csv'), # logger to csv EarlyStopping( monitor='loss', patience=5) # early stop if there's no improvment of the loss ] tr_data, tr_label, ts_data, ts_label = train_test_split() encoder = LabelBinarizer() tr_label = encoder.fit_transform(tr_label) ts_label = encoder.transform(ts_label) print( "Start Training process \nTraining data shape {} \nTraining label shape {}" .format(tr_data.shape, tr_label.shape)) model.fit(tr_data, tr_label, batch_size=16, epochs=100, verbose=1, validation_split=0.2) model.save('5class_segmentYoutube_model.h5') pred = model.predict(ts_data) pred = encoder.inverse_transform(pred) ts_label = encoder.inverse_transform(ts_label) cm = confusion_matrix(y_target=ts_label, y_predicted=pred, binary=False) cm = confusion_matrix(y_target=ts_label, y_predicted=pred, binary=False) plt.figure(figsize=(10, 10)) fig, ax = plot_confusion_matrix(conf_mat=cm) ax.set_xticklabels([''] + CLASS_TYPE, rotation=40, ha='right') ax.set_yticklabels([''] + CLASS_TYPE) plt.savefig("ConfusionMatrix_segment_youtube.png") plt.show()
def make_plots(train, test, pipelines): extensions = ['svg', 'eps', 'png'] X_train, y_train = load_X_y(train) X_test, y_test = load_X_y(test) pipelines.sort() clf = pipelines._results[0] y_pred = clf.predict(X_test) classifiers_with_predict_proba = find_classifiers_with_predict_proba() plt.clf() if clf.classifier.__class__.__name__ in classifiers_with_predict_proba: y_probas = clf.predict_proba(X_test)[:,1] fpr, tpr, _ = metrics.roc_curve(y_test, y_probas) plt.plot([0, 1], [0, 1], 'k--') plt.plot(fpr, tpr) plt.xlabel('False positive rate') plt.ylabel('True positive rate') fig = plt.gcf() fig.set_size_inches(4,3) plt.tight_layout() for ext in extensions: plt.savefig("./figures/roc_curve."+ext) else: print(clf.classifier.__class__.__name__,"not in predict proba list") cm = confusion_matrix(y_target=y_test, y_predicted=y_pred, binary=True) plot_confusion_matrix(conf_mat=cm, colorbar=True) fig = plt.gcf() fig.set_size_inches(4,3) plt.tight_layout() for ext in extensions: plt.savefig("./figures/confusion_matrix."+ext) '''plot_learning_curves(X_train, y_train, X_test, y_test,
def init(X_train, y_train, X_test, y_test, index=0): # pca = PCA(n_components=2, whiten=True) # pca = pca.fit(X_train) # print('Explained variance percentage = %0.2f' % sum(pca.explained_variance_ratio_)) # X_train = pca.transform(X_train) # X_test = pca.transform(X_test) from mlxtend.evaluate import confusion_matrix from mlxtend.plotting import plot_confusion_matrix oc_svm_clf = svm.OneClassSVM(nu=0.9, gamma=0.0001, kernel='linear') # Obtained using grid search oc_svm_clf.fit(X_train, y_train) oc_svm_preds = oc_svm_clf.predict(X_test) cm = confusion_matrix(y_target=y_test, y_predicted=oc_svm_preds, binary=True) fig, ax = plot_confusion_matrix(conf_mat=cm) print(cm) # plt.savefig("confusion_matrix.pdf", format='pdf') plt.savefig("confusion_matrix" + str(index) + ".png", format='png')
def saveConfusionMatrix(self, y_test, y_pred): cm = confusion_matrix(y_target=y_test, y_predicted=y_pred, binary=False) fig, ax = plot_confusion_matrix(conf_mat=cm) ax.set_title('RandomForest Confusion Matrix') plt.savefig('images/' + self.name + '_Confusion_Matrix.png') plt.show() plt.close()
def test_binary(): y_targ = [1, 1, 1, 0, 0, 2, 0, 3] y_pred = [1, 0, 1, 0, 0, 2, 1, 3] x = np.array([[4, 1], [1, 2]]) y = confusion_matrix(y_targ, y_pred, binary=True, positive_label=1) assert_array_equal(x, y)
def multi_class_confision(imageResults): y_target = [] y_predicted = [] for result in imageResults: y_target.append(result[1][0]) y_predicted.append(result[2][0]) cm = confusion_matrix(y_target=y_target, y_predicted=y_predicted, binary=False) table = [] for i in range(10): table.append([categoryDict[i]] + cm[i].tolist()) result_file.write("Confusion Matrix:\n\n") result_file.write(tabulate(table, headers=['', categoryDict[0], categoryDict[1], categoryDict[2], categoryDict[3], categoryDict[4], categoryDict[5], categoryDict[6], categoryDict[7], categoryDict[8], categoryDict[9]], tablefmt='orgtbl')) result_file.write("\n") recall = np.diag(cm) / np.sum(cm, axis=1) precision = np.diag(cm) / np.sum(cm, axis=0) # overall recall and precision overall_recall = np.mean(recall) overall_precision = np.mean(precision) accuracy = np.diag(cm) / 10 accuracy = np.mean(accuracy) return recall, precision, overall_recall, overall_precision, accuracy
def test(model, device, test_loader, criterion, epoch): # Test the model model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance) loss_list = [] all_predicted = [] all_labels = [] classes = ( '10_down', '04_fist_moved', '01_palm', '05_thumb', '02_l', '09_c', '08_palm_moved', '07_ok', '03_fist', '06_index') with torch.no_grad(): correct = 0 total = 0 for i, (images, labels) in enumerate(test_loader): images = images.to(device) labels = labels.to(device) for item in labels.cpu().numpy(): all_labels.append(item) labels = labels.long() labels = labels.view(-1, len(labels))[0] outputs = model(images) outputs = outputs.float() loss = criterion(outputs, labels) loss_list.append(loss.item()) if i % 10 == 0: print('Validation Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, i * len(images), len(test_loader.dataset), 100. * i / len(test_loader), loss.item())) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() for item in predicted.cpu().numpy(): all_predicted.append(item) print('Test Accuracy of the model on the 3000 test images: {} %'.format(100 * correct / total)) accuracy = 100 * correct / total all_labels_array = np.array(all_labels).reshape(-1, ) all_predicted_array = np.array(all_predicted).reshape(-1, ) my_dict = dict(list(enumerate(classes))) # print(all_labels_array) # print(all_predicted_array) # print("My dict=", my_dict) all_labels_vect = np.vectorize(my_dict.get)(all_labels_array) # print(all_labels_vect) all_predicted_vect = np.vectorize(my_dict.get)(all_predicted_array) # Create CM From Data cm1 = ConfusionMatrix(predict_vector=all_predicted_vect, actual_vector=all_labels_vect) # Create CM From Data #cm1 = ConfusionMatrix(actual_vector=all_labels_array, predict_vector=all_predicted_array) cm = confusion_matrix(y_target=all_labels_array, y_predicted=all_predicted_array, binary=False) # print(cm.F1) # print(cm1) # print(type(cm.F1)) # print(type(cm1)) return accuracy, loss_list, cm, cm1
def __str__(self): truth, prediction = self._fix_label_prediction_representation() distinct_values = {*truth.reshape((-1, ))} cmx = confusion_matrix(truth, prediction, binary=len(distinct_values) <= 2) return f"{cmx}"
def dump(): hate_speech = pd.read_csv( './twitter-hate-speech-classifier-DFE-a845520.csv', encoding='iso-8859-1') print('There are', len(hate_speech), 'data points.') hate_speech_subset = hate_speech.iloc[:, [19, 5, 6]] hate_speech_subset.columns = ['Tweets', 'Verdict', 'Confidence'] le = preprocessing.LabelEncoder() le.fit(list(hate_speech_subset.Verdict.unique())) hate_speech_subset['Numeric_Verdict'] = le.transform( list(hate_speech_subset.Verdict.values)) hate_speech_subset['Tweets'] = hate_speech_subset['Tweets'].map( lambda x: processTweet(x)) text = hate_speech_subset['Tweets'].values vectorizer = CountVectorizer(ngram_range=(1, 2)) vectorizer.fit(text) X = vectorizer.transform(text) y = hate_speech_subset['Numeric_Verdict'].values X_train, X_test, y_train, y_test = train_test_split(X, y) cm = confusion_matrix( y_train, SVC(kernel='linear', probability=True).fit(X_train, y_train).predict(X_train)) fig, ax = plot_confusion_matrix(conf_mat=cm) plt.show() #0 : The tweet contains hate speech #1 : The tweet is not offensive #2 : The tweet uses offensive language but not hate speech print(X.shape) """ param_grid = {"max_depth": [3, None], "n_estimators": [10, 50, 100], "max_features": [1, 3, 10], "min_samples_split": [2, 3, 10], "min_samples_leaf": [1, 3, 10], "bootstrap": [True, False], "criterion": ["gini", "entropy"]} grid_rf = GridSearchCV(RandomForestClassifier(), param_grid=param_grid, cv=10, scoring='accuracy') grid_rf.fit(X_train, y_train) grid_rf.score(X_train, y_train) """ clf_rfc = RandomForestClassifier() clf_rfc.fit(X_train, y_train) score = clf_rfc.score(X_test, y_test) print(score) pickle.dump(clf_rfc, open('pkl_objects/classifier.pkl', 'wb'), protocol=4) convert('pkl_objects/classifier.pkl')
def test_multiclass(): y_targ = [1, 1, 1, 0, 0, 2, 0, 3] y_pred = [1, 0, 1, 0, 0, 2, 1, 3] x = np.array([[2, 1, 0, 0], [1, 2, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) y = confusion_matrix(y_targ, y_pred, binary=False, positive_label=1) assert_array_equal(x, y)
def plot_confusion_matrix(self, figsize=(6, 6)) -> Tuple[Figure, Axis]: from mlxtend.plotting import plot_confusion_matrix truth, prediction = self._fix_label_prediction_representation() distinct_values = {*truth.reshape((-1, ))} cm = confusion_matrix(truth, prediction, binary=len(distinct_values) <= 2) return plot_confusion_matrix(cm, figsize=figsize)
def plot_one_hot_encoded_confusion_matrix( df: pd.DataFrame, true_columns, prediction_columns) -> Tuple[Figure, Axis]: y_hat = df[prediction_columns].apply(lambda row: np.argmax(row), raw=True, axis=1) y = df[true_columns].apply(lambda row: np.argmax(row), raw=True, axis=1) cm = confusion_matrix(y.values, y_hat.values) return plot_confusion_matrix(cm, figsize=(12, 12))
def plot_confusion_matrix(self, x_test, y_test, logger, *argv): try: estimator = self.estimator.best_estimator_ cm = confusion_matrix(y_target=y_test, y_predicted=estimator.predict(x_test), binary=False) fig, ax = plot_confusion_matrix(conf_mat=cm, figsize=(15, 15)) plt.savefig('../../plots/cm_' + str(argv[0]) + "_" + str(argv[1]) + '.png') logger.info('Plotting confusion matrix completed') except Exception as e: logger.error('Failed in plot_confusion_matrix:' + str(e))
def save_confusion_matrix(file_path, y_target, y_predicted, target_names=None, binary=False): cm = confusion_matrix(y_target, y_predicted, binary) fig, ax = plot_confusion_matrix(conf_mat=cm, colorbar=True, show_absolute=False, show_normed=True) if target_names is not None: tick_marks = np.arange(len(target_names)) plt.xticks(tick_marks, target_names) plt.yticks(tick_marks, target_names) plt.savefig(file_path)
def plot(actual: List[Any], predicted: List[Any], type: str, threshold: float): multi_class_cm = confusion_matrix(y_target=actual, y_predicted=predicted, binary=False) multi_class_plot, ax = plot_confusion_matrix(conf_mat=multi_class_cm, class_names=GRADE_HIERARCHY, colorbar=True, show_absolute=True, show_normed=True) multi_class_plot.suptitle( f"Multi Class Confusion Matrix for {type} Majority Sorting (λ = {threshold})", fontsize=10) multi_class_plot.savefig(f"confusion_matrix_{type}_{threshold}.png")
def calculate_performance(observations: pd.DataFrame, predictions: Union[Network, NetworkGroup], sign: bool) -> Tuple[dict, float, float]: prediction_df = pd.DataFrame( np.zeros_like(observations.values), index=observations.index, columns=observations.columns, ) if len(predictions.links): if isinstance(predictions, Network): table = predictions.get_adjacency_table("weight") for row in table.index: for col in table.columns: prediction_df.loc[row, col] = table.loc[row, col] elif isinstance(predictions, NetworkGroup): vector_table = predictions.get_adjacency_vectors("weight") for row in vector_table.index: if row not in predictions.linkid_revmap: continue else: source, target = predictions.linkid_revmap[row][0][ -1].split("-") # FIXME: Is this the right thing to do? val = np.mean(vector_table.loc[row, :]) prediction_df.loc[source, target] = val prediction_df.loc[target, source] = val else: raise ValueError("Unsupported predictions object") np.fill_diagonal(prediction_df.values, 0.0) prediction_df.fillna(0.0, inplace=True) if sign: prediction_df[prediction_df > 0] = 1 prediction_df[prediction_df < 0] = -1 prediction_df = prediction_df.astype(int) t_vec = observations.values.reshape(-1) p_vec = prediction_df.values.reshape(-1) cm = confusion_matrix(t_vec, p_vec, binary=True, positive_label=0) cm_fixed = [[cm[1, 1], cm[1, 0]], [cm[0, 1], cm[0, 0]]] cm_dict = { "tn": cm_fixed[0][0], "fp": cm_fixed[0][1], "fn": cm_fixed[1][0], "tp": cm_fixed[1][1], } precision = calculate_precision(cm_dict) sensitivity = calculate_sensitivity(cm_dict) else: cm_dict = {"tn": np.nan, "fp": np.nan, "fn": np.nan, "tp": np.nan} precision = np.nan sensitivity = np.nan return cm_dict, precision, sensitivity
def __str__(self): from mlxtend.evaluate import confusion_matrix # get true and prediction data. It needs to be a one hot encoded 2D array [samples, class] where nr_classes >= 2 tv, pv = clean_one_hot_classification( self.df[LABEL_COLUMN_NAME]._.values, self.df[PREDICTION_COLUMN_NAME]._.values) # confusion matrix needs integer encoding tv = np.apply_along_axis(np.argmax, 1, tv) pv = np.apply_along_axis(np.argmax, 1, pv) cm = confusion_matrix(tv, pv, binary=tv.max() < 2) return f"{cm}"
def testConfusion(clf, X, y): y_pred = clf.predict(X) score = clf.score(X, y) cm = confusion_matrix(y, y_pred) # Plot it classes = np.append( "", max(np.unique(y), np.unique(y_pred), key=lambda x: len(x))) fig, ax = plot_confusion_matrix(conf_mat=cm) ax.set_xticklabels(classes, rotation=90) ax.set_yticklabels(classes) ax.set_title("Binary testing error {:.2f}".format(score)) plt.show() return y_pred, cm
def show_cm(targets, predictions): ''' Shows a confusion matrix for model testing. :param targets: Numpy array containing targets :param predictions: Numpy array containing corresponding predictions :return: figure object containing confusion matrix ''' cm = confusion_matrix(y_target=targets, y_predicted=predictions, binary=False) fig, ax = plot_confusion_matrix(conf_mat=cm) plt.show(block=True) return fig
def init(X_train, y_train, X_test, y_test, index=0): pca = PCA(n_components=2, whiten=True) pca = pca.fit(X_train) print("Treinando PCA...") print('Explained variance percentage = %0.2f' % sum(pca.explained_variance_ratio_)) X_train = pca.transform(X_train) X_test = pca.transform(X_test) print("Transformando PCA...") classifier = KNeighborsClassifier(n_neighbors=19, weights="uniform", metric="euclidean", n_jobs=-1) print("Treinando classificador...") classifier.fit(X_train, y_train) print("Classificando...") y_predicted = classifier.predict(X_test) from mlxtend.evaluate import confusion_matrix from mlxtend.plotting import plot_confusion_matrix cm = confusion_matrix(y_target=y_test, y_predicted=y_predicted, binary=True) fig, ax = plot_confusion_matrix(conf_mat=cm) print(cm) # plt.savefig("confusion_matrix.pdf", format='pdf') plt.savefig("confusion_matrix" + str(index) + ".png", format='png') ############################################### ## Classification Report ############################################### from sklearn.metrics import classification_report c_report = classification_report(y_test, y_predicted) ### print values print("classification_report") print(c_report)
def plot_confusion_matrix(df, figsize=(6, 6), **kwargs): from mlxtend.plotting import plot_confusion_matrix from mlxtend.evaluate import confusion_matrix # get true and prediction data. It needs to be a one hot encoded 2D array [samples, class] where nr_classes >= 2 tv, pv = clean_one_hot_classification(df[LABEL_COLUMN_NAME]._.values, df[PREDICTION_COLUMN_NAME]._.values) # confusion matrix needs integer encoding tv = np.apply_along_axis(np.argmax, 1, tv) pv = np.apply_along_axis(np.argmax, 1, pv) # plot the confusion matrix cm = confusion_matrix(tv, pv, binary=tv.max() < 2) fig, ax = plot_confusion_matrix(cm, figsize=figsize) return fig
count_vectorizer = CountVectorizer(stop_words='english') count_train = count_vectorizer.fit_transform(x_train.values) count_test = count_vectorizer.transform(x_test.values) pred_test = OneVsRestClassifier(LinearSVC(random_state=0)).fit(count_train, y_train).predict(count_test) #comprobamos la efectividad del modelo pred_testd = pd.DataFrame(pred_test, columns=list(data.columns.values)[2:len(data.columns.values)]) cols = list(data.columns.values)[2:len(data.columns.values)] from mlxtend.evaluate import confusion_matrix import matplotlib.pyplot as plt from mlxtend.plotting import plot_confusion_matrix plt.subplot(3,3,3) for i in range(len(cols)): cm = confusion_matrix(y_target=y_test[cols[i]], y_predicted=pred_testd[cols[i]]) fig, ax = plot_confusion_matrix(conf_mat=cm) plt.title(cols[i]) plt.show() for i in range(len(cols)): print( accuracy_score(y_test[cols[i]],pred_testd[cols[i]])) for j in range(len(cols)): uns = [i for i, v in enumerate(y_test[cols[j]]) if v == 1] ok= sum(pred_testd[cols[j]][uns]==1)/sum(y_test[cols[j]]==1) print(cols[j]) print(ok)
y_pred = [] y_actual = [] path = TEST for i in os.listdir(path): print(i) for f in os.listdir(os.path.join(path, i)): ext = os.path.splitext(f)[1] if ext == '.jpg' or ext == '.jpeg': y_pred.append( recognise(str(IdentityMetadata(path, i, f)), database, FRmodel)) y_actual.append(i) print(y_pred) print(y_actual) cm = confusion_matrix(y_target=y_actual, y_predicted=y_pred, binary=False) fig, ax = plot_confusion_matrix(conf_mat=cm) plt.show() else: FRmodel = faceRecoModel(input_shape=(3, 96, 96)) load_weights_from_FaceNet(FRmodel) #FRmodel.load_weights("mytraining.h5") FRmodel.summary() fix(FRmodel) FRmodel.summary() in_a = Input(shape=(3, 96, 96)) in_p = Input(shape=(3, 96, 96)) in_n = Input(shape=(3, 96, 96)) emb_a = FRmodel(in_a)
model.add(Dense(5, input_dim=17, activation='relu')) model.add(Dense(5, activation='sigmoid')) model.add(Dense(1, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(x_train, y_train, epochs=10) scores = model.evaluate(x_train, y_train) print(model.metrics_names[1], scores[1] * 100) score = model.predict(x_test) print(score) score = score.round() print(scores) y_target = list(y_test) from mlxtend.evaluate import confusion_matrix cm = confusion_matrix( y_target=[7, 4, 2, 1, 7, 4, 2, 6, 5, 3, 3, 4, 1, 1, 2, 1, 6, 1, 7, 2], y_predicted=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], binary=False) '''y_actu = pd.Series([7, 4, 2, 1, 7, 4, 2, 6, 5, 3, 3, 4, 1, 1, 2, 1, 6, 1, 7, 2],name='Actual') y_pred = pd.Series([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],name='Predicted') df_confusion = pd.crosstab(y_actu, y_pred) ''' print(cm) import matplotlib.pyplot as plt from mlxtend.evaluate import confusion_matrix from mlxtend.plotting import plot_confusion_matrix fig, ax = plot_confusion_matrix(conf_mat=cm) plt.show()
def classification(self, x, y): """Sampling""" sss = StratifiedShuffleSplit(n_splits=3, test_size=0.3, random_state=0) x_train = [] x_test = [] y_train = [] y_test = [] for train_index, test_index in sss.split(x, y): x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] "----------------------------------LOGISTIC REGRESSION ----------------------------------" classifier_lr = LogisticRegression() classifier_lr.fit(x_train, y_train) y_predict_logistic = classifier_lr.predict(x_test) cm_logistic = metrics.confusion_matrix(y_test, y_predict_logistic) print("CONFUSION MATRIX TEST AND PREDICT") print(cm_logistic) sns.heatmap(cm_logistic, square=True) plt.show() print("Coefficient of determination on training set:", classifier_lr.score(x_train, y_train)) acc_logistic = accuracy_score(y_test, y_predict_logistic) print("Accuracy Logistic Regression" + str(acc_logistic)) f1_score_logistic = skl.metrics.f1_score(y_test, y_predict_logistic, average='macro') print("F1-score Logistic Regression: %f" % f1_score_logistic) precision_lr = precision_score(y_test, y_predict_logistic, pos_label=3, average='macro') print("Precision Logistic Regression " + str(precision_lr)) recall_lr = recall_score(y_test, y_predict_logistic, pos_label=3, average='macro') print("Recall Logistic Regression " + str(recall_lr)) "----------------------------------LINEAR SVN ----------------------------------" classifier_svc = svm.LinearSVC() classifier_svc.fit(x_train, y_train) y_pred_svc_linear = classifier_svc.predict(x_test) cm_svc_linear = confusion_matrix(y_test, y_pred_svc_linear) print(cm_svc_linear) acc_svc_linear = accuracy_score(y_test, y_pred_svc_linear) print("Accuracy SVN" + str(acc_svc_linear)) f1_score_svn = skl.metrics.f1_score(y_test, y_pred_svc_linear, average='macro') print("F1-score Linear SVN: %f" % f1_score_svn) precision_svn = precision_score(y_test, y_pred_svc_linear, pos_label=3, average='macro') print("Precision Linear SVN " + str(precision_svn)) recall_svn = recall_score(y_test, y_pred_svc_linear, pos_label=3, average='macro') print("Recall Linear SVN " + str(recall_svn)) print(classifier_svc.coef_) print("classifier_svc.coef_") "----------------------------------PERCEPTRON ----------------------------------" clf_perceptron = Perceptron(n_iter=2, shuffle=False) clf_perceptron.fit(x_train, y_train) y_pred_perceptron = clf_perceptron.predict(x_test) cm_perceptron = confusion_matrix(y_test, y_pred_perceptron) print(cm_perceptron) acc_perceptron = accuracy_score(y_test, y_pred_perceptron) print("Accuracy Perceptron" + str(acc_perceptron)) f1_score_perceptron = skl.metrics.f1_score(y_test, y_pred_perceptron, average='macro') print("F1-score Perceptron: %f" % f1_score_perceptron) precision_perceptron = precision_score(y_test, y_pred_perceptron, pos_label=3, average='macro') print("Precision Perceptron " + str(precision_perceptron)) recall_perceptron = recall_score(y_test, y_pred_perceptron, pos_label=3, average='macro') print("Recall Perceptron " + str(recall_perceptron)) """----------------------------------LINEAR REGRESSION ----------------------------------""" regression = linear_model.LinearRegression() regression.fit(x_train, y_train) y_pred_regression = regression.predict(x_test) cm_regression = confusion_matrix(y_test, y_pred_regression) print(cm_regression) #score = regression.score(x_test, y_test) print('Coefficients for Linear Regression: \n', regression.coef_) plt.figure() plt.plot(regression.coef_, color='navy', linestyle='--') plt.title('Coefficients for Linear Regression') plt.show()
def cnn_recognition(): # Load the data from chords.csv df = pd.read_csv('chords.csv') data = [] for i in df.itertuples(): # print(i[1]) y, sr = librosa.core.load(i[1], duration=1.5) mfcc = librosa.feature.melspectrogram(y=y, sr=sr) # print(mfcc.shape) if mfcc.shape == (128, 65): data.append((mfcc, i[3])) print("number of audio samples : " + str(len(data))) # Shuffle the data randomly and load to training and testing sets random.shuffle(data) train = data[:1405] test = data[1405:] # Zip takes iterables and returns tuples X_train, y_train = zip(*train) X_test, y_test = zip(*test) # Reshape the spectogram to (128,65) X_train = np.array([x.reshape((128, 65, 1)) for x in X_train]) X_test = np.array([x.reshape((128, 65, 1)) for x in X_test]) # One hot encoding to model class_id y_train = np.array(to_categorical(y_train, 10)) y_test = np.array(to_categorical(y_test, 10)) # Building Sequential Model model = Sequential() input_shape = (128, 65, 1) model.add(Conv2D(24, (5, 5), strides=(1, 1), input_shape=input_shape)) model.add(MaxPooling2D((4, 2), strides=(4, 2))) model.add(Activation('relu')) model.add(Conv2D(48, (5, 5), padding="valid")) model.add(MaxPooling2D((4, 2), strides=(4, 2))) model.add(Activation('relu')) model.add(Conv2D(48, (5, 5), padding="valid")) model.add(Activation('relu')) model.add(Flatten()) model.add(Dropout(rate=0.5)) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dropout(rate=0.5)) model.add(Dense(10)) model.add(Activation('softmax')) model.summary() model.compile(optimizer="Adam", loss="categorical_crossentropy", metrics=['accuracy']) # Train the Model hist = model.fit(x=X_train, y=y_train, epochs=40, batch_size=30, validation_data=(X_test, y_test)) # Evaluation of the Model score = model.evaluate(x=X_test, y=y_test) print('Test loss:', score[0]) print('Test accuracy:', score[1]) img = 'Test_Loss_and_Test_Accuracy' y_pos = np.arange(2) column = ['Test Loss', 'Test Accuracy'] plt.bar(y_pos, score, label='Loss and Accuracy') plt.xticks(y_pos, column) plt.ylabel('Percentage') plt.legend() plt.savefig('images/{}'.format(img)) plt.show() # Saving the accuracy and loss of the model in txt file with open('model_accuracy_and_loss.txt', 'w') as f: f.write('Test Loss : ' + str(score[0]) + '\n') f.write('Test Accuracy : ' + str(score[1])) train_loss = hist.history['loss'] validation_loss = hist.history['val_loss'] train_acc = hist.history['accuracy'] validation_acc = hist.history['val_accuracy'] num_epochs = range(1, 41) #Save the model loss to result_images name1 = 'model_loss' # Plotting Model Loss plt.figure(1, figsize=(8, 6)) plt.plot(num_epochs, train_loss) plt.plot(num_epochs, validation_loss) plt.xlabel('Number of Epochs') plt.ylabel('Loss') plt.title('Training Loss vs Validation Loss') plt.grid(True) plt.legend(['Training Loss', 'Validation Loss']) plt.savefig('images/{}'.format(name1)) plt.show() # Saving the model accuracy to result_images name2 = 'model_accuracy' # Plotting Model Accuracy plt.figure(2, figsize=(8, 6)) plt.plot(num_epochs, train_acc) plt.plot(num_epochs, validation_acc) plt.xlabel('Epochs') plt.ylabel('Loss') plt.title('Training Accuracy vs Validation Accuracy') plt.grid(True) plt.legend(['Training Accuracy', 'Validation Accuracy']) plt.savefig('images/{}'.format(name2)) plt.show() # Predicting the Model y_pred = model.predict_classes(X_test) label_id = np.argmax(y_test, axis=1) conf_matrix = confusion_matrix(label_id, y_pred, binary=False) print(conf_matrix) # Visualizing the performance of the Model name3 = 'confusion_matrix' plot_confusion_matrix(conf_mat=conf_matrix, class_names=chord_label) plt.title('Confusion Matrix') plt.savefig('images/{}'.format(name3)) plt.show() # SAVE THE MODEL model.save('model.h5')
def plot_confusion_matrix(self, figsize=(12, 12)) -> plt.Figure: y = self.df[LABEL_COLUMN_NAME].apply(lambda row: np.argmax(row), raw=True, axis=1) y_hat = self.df[PREDICTION_COLUMN_NAME].apply(lambda row: np.argmax(row), raw=True, axis=1) cm = confusion_matrix(y.values, y_hat.values) return plot_confusion_matrix(cm, figsize=figsize)[0]
X_test = test_data[:, 0:size_new - 2] y_test = test_data[:, size_new - 1] scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) model_rfc = RandomForestClassifier(n_estimators=40) model_rfc.fit(X_train, y_train) pred_rfc = model_rfc.predict(X_test) acc_rfc = 0 for i in range(0, len(pred_rfc)): if pred_rfc[i] == y_test[i]: acc_rfc = acc_rfc + 1 acc_rfc = acc_rfc / len(pred_rfc) print("rfc " + str(acc_rfc)) cm = confusion_matrix(y_target=y_test, y_predicted=pred_rfc, binary=False) fig, ax = plot_confusion_matrix(conf_mat=cm) plt.show() model_knn = KNeighborsClassifier(n_neighbors=15) model_knn.fit(X_train, y_train) pred_knn = model_knn.predict(X_test) acc_knn = 0 for i in range(0, len(pred_knn)): if pred_knn[i] == y_test[i]: acc_knn = acc_knn + 1 acc_knn = acc_knn / len(pred_knn) print("knn " + str(acc_knn) + "\n") #print(X_test) datafile.close()
def get_confusion_matrix_one_hot(runname, model_results, truth): '''model_results and truth should be for one-hot format, i.e, have >= 2 columns, where truth is 0/1, and max along each row of model_results is model result ''' mr = [] mr2 = [] mr3 = [] print(model_results, truth) for x in model_results: mr.append(np.argmax(x)) mr2.append(x) mr3 = label_binarize(mr, classes=[0, 1, 2]) no_ev = min(len(mr), len(truth)) print(no_ev) model_results = np.asarray(mr)[:no_ev] truth = np.asarray(truth)[:no_ev] print(np.shape(model_results), np.shape(truth)) mr2 = mr2[:no_ev] mr3 = mr3[:no_ev] cm = confusion_matrix(y_target=truth, y_predicted=np.rint(np.squeeze(model_results)), binary=False) fig, ax = plot_confusion_matrix(conf_mat=cm, figsize=(5, 5)) plt.xlabel('Predicted Label') plt.ylabel('True Label') plt.savefig('/home/clarkr/Figures/' + runname + 'confmat.png') lw = 2 n_classes = 3 fpr = dict() tpr = dict() roc_auc = dict() t2 = label_binarize(truth, classes=[0, 1, 2]) print(mr2[:100]) for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(t2[:, i], np.asarray(mr2)[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) fpr["micro"], tpr["micro"], _ = roc_curve(t2.ravel(), mr3.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) # Plot all ROC curves plt.figure() plt.plot(fpr["micro"], tpr["micro"], label='Micro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4) plt.plot(fpr["macro"], tpr["macro"], label='Macro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["macro"]), color='navy', linestyle=':', linewidth=4) colors = cycle(['aqua', 'darkorange', 'cornflowerblue']) for i, color in zip(range(n_classes), colors): plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='ROC curve of class {0} (area = {1:0.2f})' ''.format(i, roc_auc[i])) plt.legend(loc="lower right") plt.savefig('/home/clarkr/Figures/' + runname + '_roc.png') np.save('/home/clarkr/confmatdata/' + runname + '_fp.npy', fpr) np.save('/home/clarkr/confmatdata/' + runname + '_tp.npy', tpr) return cm
def CNN_function(): x_train_label = [] x_test_label = [] x_predict_label = [] x_train_dataset = [] x_test_dataset = [] x_predict_dataset = [] X_label = pd.Categorical(X) categories = X_label.categories X_label = X_label.codes # Split the data into train and test dataset x_train_label, x_test_label, x_train_dataset, x_test_dataset = train_test_split( X_label, Y, test_size=0.19, random_state=3) # Split the data train data into further training labels and prediction dataset x_train_label, x_predict_label, x_train_dataset, x_predict_dataset = train_test_split( x_train_label, x_train_dataset, test_size=0.23, random_state=3) x_train_nmpy = np.array(x_train_label) x_test_nmpy = np.array(x_test_label) x_predict_nmpy = np.array(x_predict_label) # Normalize the pixel values of the train data and test data x_train_dataset = tf.keras.utils.normalize(x_train_dataset, axis=1) x_test_dataset = tf.keras.utils.normalize(x_test_dataset, axis=1) x_predict_dataset = tf.keras.utils.normalize(x_predict_dataset, axis=1) # Reshaping the dataset for input to neural network x_train_dataset = x_train_dataset.reshape( (x_train_dataset.shape[0], 28, 28, 1)).astype('float32') x_test_dataset = x_test_dataset.reshape( (x_test_dataset.shape[0], 28, 28, 1)).astype('float32') x_predict_dataset = x_predict_dataset.reshape( (x_predict_dataset.shape[0], 28, 28, 1)).astype('float32') # Creating a Sequential Model for Neural Network # 2 Convolution Layers with 30,15 filter and Kernel of 6x6 and 3x3 # Pooling Layer 2 with (2x2) # 2 Dense layer # Flatten Layer model = Sequential() model.add(Conv2D(30, (6, 6), input_shape=(28, 28, 1), activation='relu')) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(15, (3, 3), activation='relu')) model.add(MaxPooling2D(2, 2)) model.add(Dropout(0.2)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dense(128, activation='relu')) model.add(Dense(10, activation='softmax')) # Compilation of given model model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) num_epochs = 30 # Training our model Model1 = model.fit(x_train_dataset, x_train_nmpy, epochs=num_epochs, validation_data=(x_test_dataset, x_test_nmpy)) plt.plot(Model1.history['accuracy']) plt.plot(Model1.history['val_accuracy']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'val'], loc='upper left') plt.show() # Predictions and Testing Part predictions = model.predict(x_predict_dataset) pred = [] for i in range(0, len(x_predict_label)): pred.append(np.argmax(predictions[i])) total = len(pred) n_p = 0 for x in range(0, len(pred)): if (int(pred[x]) == int(x_predict_label[x])): n_p = n_p + 1 print("The accuracy of test set: " + str((n_p / total) * 100)) a = x_predict_label.tolist() cm = confusion_matrix(a, pred) print(cm) # Plotting the confusion matrix df_cm = pd.DataFrame(cm, range(10), range(10)) sn.set(font_scale=1.2) #for label size sn.heatmap(df_cm, annot=True, annot_kws={"size": 16}) # font size plt.show()