def perform_clustering(seed, m_data, labels, n_clusters): # Singleview spherical kmeans clustering # Cluster each view separately s_kmeans = SphericalKMeans(n_clusters=n_clusters, random_state=seed, n_init=100) s_clusters_v1 = s_kmeans.fit_predict(m_data[0]) s_clusters_v2 = s_kmeans.fit_predict(m_data[1]) # Concatenate the multiple views into a single view s_data = np.hstack(m_data) s_clusters = s_kmeans.fit_predict(s_data) # Compute nmi between true class labels and singleview cluster labels s_nmi_v1 = nmi_score(labels, s_clusters_v1) s_nmi_v2 = nmi_score(labels, s_clusters_v2) s_nmi = nmi_score(labels, s_clusters) print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1)) print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2)) print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi)) # Multiview spherical kmeans clustering # Use the MultiviewKMeans instance to cluster the data m_kmeans = MultiviewSphericalKMeans(n_clusters=n_clusters, n_init=100, random_state=seed) m_clusters = m_kmeans.fit_predict(m_data) # Compute nmi between true class labels and multiview cluster labels m_nmi = nmi_score(labels, m_clusters) print('Multiview NMI Score: {0:.3f}\n'.format(m_nmi)) return m_clusters
def perform_clustering(seed, m_data, labels, n_clusters, kernel='rbf'): # Single-view spectral clustering # Cluster each view separately s_spectral = SpectralClustering(n_clusters=n_clusters, random_state=RANDOM_SEED, affinity=kernel, n_init=100) s_clusters_v1 = s_spectral.fit_predict(m_data[0]) s_clusters_v2 = s_spectral.fit_predict(m_data[1]) # Concatenate the multiple views into a single view s_data = np.hstack(m_data) s_clusters = s_spectral.fit_predict(s_data) # Compute nmi between true class labels and single-view cluster labels s_nmi_v1 = nmi_score(labels, s_clusters_v1) s_nmi_v2 = nmi_score(labels, s_clusters_v2) s_nmi = nmi_score(labels, s_clusters) print('Single-view View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1)) print('Single-view View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2)) print('Single-view Concatenated NMI Score: {0:.3f}\n'.format(s_nmi)) # Multi-view spectral clustering # Use the MultiviewSpectralClustering instance to cluster the data m_spectral = MultiviewSpectralClustering(n_clusters=n_clusters, random_state=RANDOM_SEED, affinity=kernel, n_init=100) m_clusters = m_spectral.fit_predict(m_data) # Compute nmi between true class labels and multi-view cluster labels m_nmi = nmi_score(labels, m_clusters) print('Multi-view Concatenated NMI Score: {0:.3f}\n'.format(m_nmi)) return m_clusters
############################################################################### # Multiview spherical KMeans clustering on 2 views # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Here we will demonstrate the performance of the multiview spherical kmeans # clustering. We will evaluate the purity of the resulting clusters with # respect to the class labels using the normalized mutual information metric. # # Use the MultiviewSphericalKMeans instance to cluster the data m_kmeans = MultiviewSphericalKMeans(n_clusters=n_class, random_state=RANDOM_SEED) m_clusters = m_kmeans.fit_predict(Xs) # Compute nmi between true class labels and multiview cluster labels m_nmi = nmi_score(labels, m_clusters) print('multiview NMI Score: {0:.3f}\n'.format(m_nmi)) ############################################################################### # Multiview spectral clustering results and the true clusters # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # We will display the clustering results of the multiview kmeans clustering # algorithm below, along with the true class labels. # Running TSNE to display clustering results via low dimensional embedding tsne = TSNE() new_data_1 = tsne.fit_transform(Xs[0]) new_data_2 = tsne.fit_transform(Xs[1]) new_data = [new_data_1, new_data_2] display_plots('True Labels', new_data, labels)
n_class = 5 Xs, labels = load_UCImultifeature( select_labeled=list(range(n_class)), views=[0, 1]) ############################################################################### # Singleview spectral clustering # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Cluster each view separately and compute nmi s_spectral = SpectralClustering( n_clusters=n_class, random_state=RANDOM_SEED, n_init=100) for i in range(len(Xs)): s_clusters = s_spectral.fit_predict(Xs[i]) s_nmi = nmi_score(labels, s_clusters, average_method='arithmetic') print('Single-view View {0:d} NMI Score: {1:.3f}\n'.format(i + 1, s_nmi)) # Concatenate the multiple views into a single view and produce clusters s_data = np.hstack(Xs) s_clusters = s_spectral.fit_predict(s_data) s_nmi = nmi_score(labels, s_clusters) print('Single-view Concatenated NMI Score: {0:.3f}\n'.format(s_nmi)) ############################################################################### # Co-Regularized multiview spectral clustering # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Use the MultiviewSpectralClustering instance to cluster the data m_spectral1 = MultiviewCoRegSpectralClustering(n_clusters=n_class,
# clustering the two views concatenated together. # Singleview kmeans clustering # Cluster each view separately s_kmeans = KMeans(n_clusters=n_class, random_state=RANDOM_SEED) s_clusters_v1 = s_kmeans.fit_predict(Xs[0]) s_clusters_v2 = s_kmeans.fit_predict(Xs[1]) # Concatenate the multiple views into a single view s_data = np.hstack(Xs) s_clusters = s_kmeans.fit_predict(s_data) # Compute nmi between true class labels and singleview cluster labels s_nmi_v1 = nmi_score(labels, s_clusters_v1) s_nmi_v2 = nmi_score(labels, s_clusters_v2) s_nmi = nmi_score(labels, s_clusters) print('Singleview View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1)) print('Singleview View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2)) print('Singleview Concatenated NMI Score: {0:.3f}\n'.format(s_nmi)) # Multiview kmeans clustering # Use the MultiviewKMeans instance to cluster the data m_kmeans = MultiviewKMeans(n_clusters=n_class, random_state=RANDOM_SEED) m_clusters = m_kmeans.fit_predict(Xs) # Compute nmi between true class labels and multiview cluster labels m_nmi = nmi_score(labels, m_clusters) print('Multiview NMI Score: {0:.3f}\n'.format(m_nmi))
def accuracy_scores_binary(y_true, y_pred): ''' A function to calculate accuracy measures for a binary classification. Function written by Osian Roberts. Parameters: :param y_true: observed binary labels, where 0 is absence and 1 is presence. :param y_pred: predicted binary labels, where 0 is absence and 1 is presence. :returns: a list containing two numpy.arrays - (metrics: name of test metrics, scores: test scores for each metric) Reference: See pages 253 - 255 in: Guisan et al. (2017). Habitat suitability and distribution models: with applications in R. ''' import numpy # check inputs: if not isinstance(y_true, numpy.ndarray): y_true = numpy.array(y_true) if not isinstance(y_pred, numpy.ndarray): y_pred = numpy.array(y_pred) if y_true.ndim != 1: raise SystemExit('ERROR: the true labels are not in a 1D array.') if y_pred.ndim != 1: raise SystemExit('ERROR: the predicted labels are not in a 1D array.') if y_true.size != y_pred.size: raise SystemExit('ERROR: unequal number of binary labels.') # ensure that y_true, y_pred contain binary labels (i.e. 0 or 1 values): y_true = y_true.astype('uint8') y_pred = y_pred.astype('uint8') if numpy.min(y_true) != 0 or numpy.max(y_true) != 1: raise SystemExit('ERROR: the true labels are not binary (zero or one values).') if numpy.min(y_pred) != 0 or numpy.max(y_pred) != 1: raise SystemExit('ERROR: the predicted labels are not binary (zero or one values).') metrics = numpy.array(['Prevalence', 'Overall Diagnostic Power', 'Correct Classification Rate', 'Misclassification Rate', 'Presence Predictive Power', 'Absence Predictive Power', 'Accuracy', 'Balanced Accuracy', 'Sensitivity', 'Specificity', 'Precision', 'F1 Score', 'Matthews Correlation', 'Cohen Kappa', 'Normalised Mutual Information', 'Hanssen-Kuiper skill']) try: n_presence = numpy.where(y_true == 1)[0].size n_absence = numpy.where(y_true == 0)[0].size # calculate true-presence, true-absence, false-presence and false-absence: TP = numpy.where((y_true == 1) & (y_pred == 1))[0].size TA = numpy.where((y_true == 0) & (y_pred == 0))[0].size FP = numpy.where((y_true == 1) & (y_pred == 0))[0].size FA = numpy.where((y_true == 0) & (y_pred == 1))[0].size # aka sweet FA! # proportion of presence records: prevalence = (TP / FA) / y_true.size # proportion of absence records: ODP = 1 - prevalence # correct classification & misclassification rate CCR = (TP + TA) / y_true.size MR = (FP + FA) / y_true.size # Sensitivity (aka Recall or True Positive Rate): sensitivity = TP / n_presence # false presence rate - inverse of sensitivity (redundant?) #FPR = 1 - sensitivity # Presence and absence predictive power: PPP = TP / (TP + FP) APP = TA / (TA + FA) # Specificity (aka True Negative Rate): specificity = TA / n_absence # false positive rate - inverse of specificity (redundant?) #FPR = 1 - specificity # Accuracy scores: accuracy = (TP + TA) / (n_presence + n_absence) balanced_accuracy = ((TP / n_presence) + (TA / n_absence)) / 2 # precision: precision = TP / (TP + FP) # F1 score: f1_score = 2 * TP / ((2*TP) + FP + FA) # Matthews Correlation Coefficient: MCC = ((TP * TA) - (FP * FA)) / (((TP + FP) * (TP + FA) * (TA + FP) * (TA + FA))**0.5) # Hanssen-Kuiper skill (unreliable when TA is very large): TSS = sensitivity + specificity - 1 del n_presence, n_absence, TP, TA, FP, FA from sklearn.metrics import normalized_mutual_info_score as nmi_score nmi_score = nmi_score(y_true, y_pred) # Cohen's Kappa (caution: sensitive to sample size and proportion of presence records): from sklearn.metrics import cohen_kappa_score as kappa kappa = kappa(y_true, y_pred) scores = numpy.array([prevalence, ODP, CCR, MR, PPP, APP, accuracy, balanced_accuracy, sensitivity, specificity, precision, f1_score, MCC, kappa, nmi_score, TSS]).round(decimals=6) del prevalence, ODP, CCR, MR, PPP, APP, accuracy, balanced_accuracy, sensitivity del specificity, precision, f1_score, MCC, kappa, nmi_score, TSS except Exception: scores = numpy.zeros(len(metrics)) if metrics.size == scores.size: return [metrics, scores] else: raise SystemExit('ERROR: unable to calculate accuracy metrics.')