def plot_tnk(mrs): test_bi_pca_all = data_utils.get_test_transformed('tnk', mrs) ot = test_bi_pca_all[test_bi_pca_all['label'] == -1] plt.scatter(ot[['pca_1']], ot[['pca_2']], linewidth=0, c='red', marker='^') noot = test_bi_pca_all[test_bi_pca_all['label'] != -1] plt.scatter(noot[['pca_1']], noot[['pca_2']], linewidth=0, c='blue', marker='X')
def plot_new_points(test_dataset, mrs): test_bi_pca_all = data_utils.get_test_transformed(test_dataset, mrs) # see what happened ot = test_bi_pca_all[test_bi_pca_all['label'] == -1] plt.scatter(ot[['pca_1']], ot[['pca_2']], s=50, linewidth=0, c='yellow', alpha=1, label='Test outliers') noot = test_bi_pca_all[test_bi_pca_all['label'] != -1] plt.scatter(noot[['pca_1']], noot[['pca_2']], s=50, linewidth=0, c='blue', alpha=1, label='Test data points') legend = plt.legend(loc='upper left') legend.legendHandles[2]._sizes = [30] legend.legendHandles[3]._sizes = [40]
def plot_nih(mrs): test_bi_pca_all = data_utils.get_test_transformed('nih', mrs) ot = test_bi_pca_all[test_bi_pca_all['label'] == -1] plt.scatter(ot[['pca_1']], ot[['pca_2']], linewidth=0, c='red', marker='^', label='testing outliers') noot = test_bi_pca_all[test_bi_pca_all['label'] != -1] plt.scatter(noot[['pca_1']], noot[['pca_2']], linewidth=0, c='blue', marker='X', label='testing normal data')
def predict_new_points(test_dataset, clusterer, mrs): test_bi_pca_all = data_utils.get_test_transformed(test_dataset, mrs) labels = test_bi_pca_all[['label']] test_bi_pca = test_bi_pca_all.drop(['label'], axis=1) # see what happened ot = test_bi_pca_all[test_bi_pca_all['label'] == -1] plt.scatter(ot[['pca_1']], ot[['pca_2']], s=50, linewidth=0, c='yellow', alpha=1, label='Test outliers') noot = test_bi_pca_all[test_bi_pca_all['label'] != -1] plt.scatter(noot[['pca_1']], noot[['pca_2']], s=50, linewidth=0, c='blue', alpha=1, label='Test data points') legend = plt.legend(loc='upper left') legend.legendHandles[2]._sizes = [30] legend.legendHandles[3]._sizes = [40] # test_labels, strengths = hdbscan.approximate_predict(clusterer, test_bi_pca) test_labels [test_labels > -1] = 0 sensitivity, specificity, accuracy = data_utils.show_performance(labels, test_labels) return sensitivity, specificity, accuracy
def predict_new_points(bi_df_pca_unique, test_dataset, mrs, cutoff): test_bi_pca_all = data_utils.get_test_transformed(test_dataset, mrs) true_labels = test_bi_pca_all[['label']] test_bi_pca = test_bi_pca_all.drop(['label'], axis=1) train_bi_pca = bi_df_pca_unique.drop(['label'], axis=1) # see what happened ot = test_bi_pca_all[test_bi_pca_all['label'] == -1] plt.scatter(ot[['pca_1']], ot[['pca_2']], s=50, linewidth=0, c='yellow', alpha=1, label='Test outliers') noot = test_bi_pca_all[test_bi_pca_all['label'] != -1] plt.scatter(noot[['pca_1']], noot[['pca_2']], s=50, linewidth=0, c='blue', alpha=1, label='Test data points') legend = plt.legend(loc='upper left') legend.legendHandles[2]._sizes = [30] legend.legendHandles[3]._sizes = [40] # clf_predict = LocalOutlierFactor(n_neighbors=k_neighbors, contamination=outliers_fraction, novelty=True) clf_predict.fit(train_bi_pca) outlier_scores = -clf_predict.score_samples(test_bi_pca) test_labels = (outlier_scores > cutoff).astype(int) * -1 sensitivity, specificity, accuracy = data_utils.show_performance( true_labels, test_labels) return sensitivity, specificity, accuracy