示例#1
0
    print("Loaded model from disk")

    # scaling test features
    x_test /= 255.

    # getting model predictions
    test_predictions = model.predict(x_test)

    labels_ohe_names = pd.get_dummies(target_labels, sparse=True)
    predictions = pd.DataFrame(test_predictions,
                               columns=labels_ohe_names.columns)
    predictions = list(predictions.idxmax(axis=1))
    test_labels = list(y_test)

    #evaluate model performance
    meu.get_metrics(true_labels=test_labels, predicted_labels=predictions)

    meu.display_classification_report(true_labels=test_labels,
                                      predicted_labels=predictions,
                                      classes=list(labels_ohe_names.columns))

    # print(meu.display_confusion_matrix_pretty(true_labels=test_labels,
    #                                 predicted_labels=predictions,
    #                                 classes=list(labels_ohe_names.columns)))

    font = {'family': 'Times New Roman', 'size': 12}
    matplotlib.rc('font', **font)
    mat = confusion_matrix(test_labels, predictions)
    plot_confusion_matrix(conf_mat=mat,
                          figsize=(4, 4),
                          class_names=list(labels_ohe_names.columns),
random_search = RandomizedSearchCV(SVC(random_state=42), param_distributions=param_grid,
                                   n_iter=50, cv=5)
random_search.fit(X_train, y_train)

print("Best parameters set found on development set:")
random_search.best_params_


# ## Evaluate Randomized Search Tuned Model

# In[35]:


rs_best = random_search.best_estimator_
rs_y_pred = rs_best.predict(X_test)
meu.get_metrics(true_labels=y_test, predicted_labels=rs_y_pred)


# # Model Interpretation

# In[36]:


from skater.core.explanations import Interpretation
from skater.model import InMemoryModel

interpreter = Interpretation(X_test, feature_names=data.feature_names)
model = InMemoryModel(logistic.predict_proba, examples=X_train, target_names=logistic.classes_)


# ## Visualize Feature Importances
示例#3
0
                        ('lr', LogisticRegression(penalty='l2', max_iter=100, random_state=42))
                       ])
param_grid = {'tfidf__ngram_range': [(1, 1), (1, 2)],
              'lr__C': [1, 5, 10]
}
gs_lr = GridSearchCV(lr_pipeline, param_grid, cv=5, verbose=2)
gs_lr = gs_lr.fit(train_corpus, train_label_names)

# evaluate best tuned model on the test dataset
best_lr_test_score = gs_lr.score(test_corpus, test_label_names)
print('Test Accuracy :', best_lr_test_score)

##
# Tuning the Linear SVM model
svm_pipeline = Pipeline([('tfidf', TfidfVectorizer()),
                        ('svm', LinearSVC(random_state=42))
                       ])
param_grid = {'tfidf__ngram_range': [(1, 1), (1, 2)],
              'svm__C': [0.01, 0.1, 1, 5]
}
gs_svm = GridSearchCV(svm_pipeline, param_grid, cv=5, verbose=2)
gs_svm = gs_svm.fit(train_corpus, train_label_names)
# evaluating best tuned model on the test dataset
best_svm_test_score = gs_svm.score(test_corpus, test_label_names)
print('Test Accuracy :', best_svm_test_score)

import model_evaluation_utils as meu
mnb_predictions = gs_mnb.predict(test_corpus)
unique_classes = list(set(test_label_names))
meu.get_metrics(true_labels=test_label_names, predicted_labels=mnb_predictions)
# see top predictors in each class 
eli5.show_weights(svm_best[1], vec=svm_best[0], top=40)

test_corpus[8]

# test for a given comment's best features 
eli5.show_prediction(svm_best[1], test_corpus[8], vec=svm_best[0], top=10)

# ### model performance evaluation with Linear SVM

# +
import model_evaluation_utils as meu

svm_predictions = gs_svm.predict(test_corpus)
unique_classes = list(set(test_label_nums))
meu.get_metrics(true_labels=test_label_nums, predicted_labels=svm_predictions)
# -

meu.display_classification_report(true_labels=test_label_nums,
                                  predicted_labels=svm_predictions,
                                  classes=unique_classes)

from confusion_matrices import confusion_matrices

confusion_matrices(test_label_nums, svm_predictions)

# ### checking mismatched values

# Extract test document row numbers
train_idx, test_idx = train_test_split(np.array(range(len(data_df['review']))), test_size=0.33, random_state=42)
test_idx
示例#5
0
random_search = RandomizedSearchCV(SVC(random_state=42),
                                   param_distributions=param_grid,
                                   n_iter=50,
                                   cv=5)
random_search.fit(X_train, y_train)

print("Best parameters set found on development set:")
random_search.best_params_

# ## Evaluate Randomized Search Tuned Model

# In[186]:

rs_best = random_search.best_estimator_
rs_y_pred = rs_best.predict(X_test)
meu.get_metrics(true_labels=y_test, predicted_labels=rs_y_pred)

# # Model Interpretation

# In[205]:

from skater.core.explanations import Interpretation
from skater.model import InMemoryModel

interpreter = Interpretation(X_test, feature_names=data.feature_names)
model = InMemoryModel(logistic.predict_proba,
                      examples=X_train,
                      target_names=logistic.classes_)

# ## Visualize Feature Importances
示例#6
0
l1 = ax1.legend(loc="best")

ax2.plot(epochs, history.history['loss'], label='Train Loss')
ax2.plot(epochs, history.history['val_loss'], label='Validation Loss')
ax2.set_xticks(epochs)
ax2.set_ylabel('Loss Value')
ax2.set_xlabel('Epoch')
ax2.set_title('Loss')
l2 = ax2.legend(loc="best")
#%%
predictions = model.predict(X_test / 255.)
#%%
test_labels = list(y_test.squeeze())
predictions = list(predictions.argmax(axis=1))
#%%
get_metrics(true_labels=y_test, predicted_labels=predictions)
#%% md
## Visualize Predictions
#%%
label_dict = {
    0: 'airplane',
    1: 'automobile',
    2: 'bird',
    3: 'cat',
    4: 'deer',
    5: 'dog',
    6: 'frog',
    7: 'horse',
    8: 'ship',
    9: 'truck'
}