print("Loaded model from disk") # scaling test features x_test /= 255. # getting model predictions test_predictions = model.predict(x_test) labels_ohe_names = pd.get_dummies(target_labels, sparse=True) predictions = pd.DataFrame(test_predictions, columns=labels_ohe_names.columns) predictions = list(predictions.idxmax(axis=1)) test_labels = list(y_test) #evaluate model performance meu.get_metrics(true_labels=test_labels, predicted_labels=predictions) meu.display_classification_report(true_labels=test_labels, predicted_labels=predictions, classes=list(labels_ohe_names.columns)) # print(meu.display_confusion_matrix_pretty(true_labels=test_labels, # predicted_labels=predictions, # classes=list(labels_ohe_names.columns))) font = {'family': 'Times New Roman', 'size': 12} matplotlib.rc('font', **font) mat = confusion_matrix(test_labels, predictions) plot_confusion_matrix(conf_mat=mat, figsize=(4, 4), class_names=list(labels_ohe_names.columns),
random_search = RandomizedSearchCV(SVC(random_state=42), param_distributions=param_grid, n_iter=50, cv=5) random_search.fit(X_train, y_train) print("Best parameters set found on development set:") random_search.best_params_ # ## Evaluate Randomized Search Tuned Model # In[35]: rs_best = random_search.best_estimator_ rs_y_pred = rs_best.predict(X_test) meu.get_metrics(true_labels=y_test, predicted_labels=rs_y_pred) # # Model Interpretation # In[36]: from skater.core.explanations import Interpretation from skater.model import InMemoryModel interpreter = Interpretation(X_test, feature_names=data.feature_names) model = InMemoryModel(logistic.predict_proba, examples=X_train, target_names=logistic.classes_) # ## Visualize Feature Importances
('lr', LogisticRegression(penalty='l2', max_iter=100, random_state=42)) ]) param_grid = {'tfidf__ngram_range': [(1, 1), (1, 2)], 'lr__C': [1, 5, 10] } gs_lr = GridSearchCV(lr_pipeline, param_grid, cv=5, verbose=2) gs_lr = gs_lr.fit(train_corpus, train_label_names) # evaluate best tuned model on the test dataset best_lr_test_score = gs_lr.score(test_corpus, test_label_names) print('Test Accuracy :', best_lr_test_score) ## # Tuning the Linear SVM model svm_pipeline = Pipeline([('tfidf', TfidfVectorizer()), ('svm', LinearSVC(random_state=42)) ]) param_grid = {'tfidf__ngram_range': [(1, 1), (1, 2)], 'svm__C': [0.01, 0.1, 1, 5] } gs_svm = GridSearchCV(svm_pipeline, param_grid, cv=5, verbose=2) gs_svm = gs_svm.fit(train_corpus, train_label_names) # evaluating best tuned model on the test dataset best_svm_test_score = gs_svm.score(test_corpus, test_label_names) print('Test Accuracy :', best_svm_test_score) import model_evaluation_utils as meu mnb_predictions = gs_mnb.predict(test_corpus) unique_classes = list(set(test_label_names)) meu.get_metrics(true_labels=test_label_names, predicted_labels=mnb_predictions)
# see top predictors in each class eli5.show_weights(svm_best[1], vec=svm_best[0], top=40) test_corpus[8] # test for a given comment's best features eli5.show_prediction(svm_best[1], test_corpus[8], vec=svm_best[0], top=10) # ### model performance evaluation with Linear SVM # + import model_evaluation_utils as meu svm_predictions = gs_svm.predict(test_corpus) unique_classes = list(set(test_label_nums)) meu.get_metrics(true_labels=test_label_nums, predicted_labels=svm_predictions) # - meu.display_classification_report(true_labels=test_label_nums, predicted_labels=svm_predictions, classes=unique_classes) from confusion_matrices import confusion_matrices confusion_matrices(test_label_nums, svm_predictions) # ### checking mismatched values # Extract test document row numbers train_idx, test_idx = train_test_split(np.array(range(len(data_df['review']))), test_size=0.33, random_state=42) test_idx
random_search = RandomizedSearchCV(SVC(random_state=42), param_distributions=param_grid, n_iter=50, cv=5) random_search.fit(X_train, y_train) print("Best parameters set found on development set:") random_search.best_params_ # ## Evaluate Randomized Search Tuned Model # In[186]: rs_best = random_search.best_estimator_ rs_y_pred = rs_best.predict(X_test) meu.get_metrics(true_labels=y_test, predicted_labels=rs_y_pred) # # Model Interpretation # In[205]: from skater.core.explanations import Interpretation from skater.model import InMemoryModel interpreter = Interpretation(X_test, feature_names=data.feature_names) model = InMemoryModel(logistic.predict_proba, examples=X_train, target_names=logistic.classes_) # ## Visualize Feature Importances
l1 = ax1.legend(loc="best") ax2.plot(epochs, history.history['loss'], label='Train Loss') ax2.plot(epochs, history.history['val_loss'], label='Validation Loss') ax2.set_xticks(epochs) ax2.set_ylabel('Loss Value') ax2.set_xlabel('Epoch') ax2.set_title('Loss') l2 = ax2.legend(loc="best") #%% predictions = model.predict(X_test / 255.) #%% test_labels = list(y_test.squeeze()) predictions = list(predictions.argmax(axis=1)) #%% get_metrics(true_labels=y_test, predicted_labels=predictions) #%% md ## Visualize Predictions #%% label_dict = { 0: 'airplane', 1: 'automobile', 2: 'bird', 3: 'cat', 4: 'deer', 5: 'dog', 6: 'frog', 7: 'horse', 8: 'ship', 9: 'truck' }