def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name='label'): """Node classification by Local and Global Consistency Parameters ---------- G : NetworkX Graph alpha : float Clamping factor max_iter : int Maximum number of iterations allowed label_name : string Name of target labels to predict Returns ---------- predicted : array, shape = [n_samples] Array of predicted labels Raises ------ NetworkXError If no nodes on `G` has `label_name`. Examples -------- >>> from networkx.algorithms import node_classification >>> G = nx.path_graph(4) >>> G.nodes[0]['label'] = 'A' >>> G.nodes[3]['label'] = 'B' >>> G.nodes(data=True) NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}}) >>> G.edges() EdgeView([(0, 1), (1, 2), (2, 3)]) >>> predicted = node_classification.local_and_global_consistency(G) >>> predicted ['A', 'A', 'B', 'B'] References ---------- Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004). Learning with local and global consistency. Advances in neural information processing systems, 16(16), 321-328. """ try: import numpy as np except ImportError: raise ImportError( "local_and_global_consistency() requires numpy: ", "http://scipy.org/ ") try: from scipy import sparse except ImportError: raise ImportError( "local_and_global_consistensy() requires scipy: ", "http://scipy.org/ ") def _build_propagation_matrix(X, labels, alpha): """Build propagation matrix of Local and global consistency Parameters ---------- X : scipy sparse matrix, shape = [n_samples, n_samples] Adjacency matrix labels : array, shape = [n_samples, 2] Array of pairs of node id and label id alpha : float Clamping factor Returns ---------- S : scipy sparse matrix, shape = [n_samples, n_samples] Propagation matrix """ degrees = X.sum(axis=0).A[0] degrees[degrees == 0] = 1 # Avoid division by 0 D2 = np.sqrt(sparse.diags((1.0 / degrees), offsets=0)) S = alpha * D2.dot(X).dot(D2) return S def _build_base_matrix(X, labels, alpha, n_classes): """Build base matrix of Local and global consistency Parameters ---------- X : scipy sparse matrix, shape = [n_samples, n_samples] Adjacency matrix labels : array, shape = [n_samples, 2] Array of pairs of node id and label id alpha : float Clamping factor n_classes : integer The number of classes (distinct labels) on the input graph Returns ---------- B : array, shape = [n_samples, n_classes] Base matrix """ n_samples = X.shape[0] B = np.zeros((n_samples, n_classes)) B[labels[:, 0], labels[:, 1]] = 1 - alpha return B X = nx.to_scipy_sparse_matrix(G) # adjacency matrix labels, label_dict = _get_label_info(G, label_name) if labels.shape[0] == 0: raise nx.NetworkXError( "No node on the input graph is labeled by '" + label_name + "'.") n_samples = X.shape[0] n_classes = label_dict.shape[0] F = _init_label_matrix(n_samples, n_classes) P = _build_propagation_matrix(X, labels, alpha) B = _build_base_matrix(X, labels, alpha, n_classes) remaining_iter = max_iter while remaining_iter > 0: F = _propagate(P, F, B) remaining_iter -= 1 predicted = _predict(F, label_dict) return predicted
def harmonic_function(G, max_iter=30, label_name="label"): """Node classification by Harmonic function Parameters ---------- G : NetworkX Graph max_iter : int maximum number of iterations allowed label_name : string name of target labels to predict Returns ------- predicted : array, shape = [n_samples] Array of predicted labels Raises ------ NetworkXError If no nodes on `G` has `label_name`. Examples -------- >>> from networkx.algorithms import node_classification >>> G = nx.path_graph(4) >>> G.nodes[0]["label"] = "A" >>> G.nodes[3]["label"] = "B" >>> G.nodes(data=True) NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}}) >>> G.edges() EdgeView([(0, 1), (1, 2), (2, 3)]) >>> predicted = node_classification.harmonic_function(G) >>> predicted ['A', 'A', 'B', 'B'] References ---------- Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August). Semi-supervised learning using gaussian fields and harmonic functions. In ICML (Vol. 3, pp. 912-919). """ import numpy as np import scipy as sp import scipy.sparse # call as sp.sparse def _build_propagation_matrix(X, labels): """Build propagation matrix of Harmonic function Parameters ---------- X : scipy sparse matrix, shape = [n_samples, n_samples] Adjacency matrix labels : array, shape = [n_samples, 2] Array of pairs of node id and label id Returns ------- P : scipy sparse matrix, shape = [n_samples, n_samples] Propagation matrix """ degrees = X.sum(axis=0).A[0] degrees[degrees == 0] = 1 # Avoid division by 0 D = sp.sparse.diags((1.0 / degrees), offsets=0) P = (D @ X).tolil() P[labels[:, 0]] = 0 # labels[:, 0] indicates IDs of labeled nodes return P def _build_base_matrix(X, labels, n_classes): """Build base matrix of Harmonic function Parameters ---------- X : scipy sparse matrix, shape = [n_samples, n_samples] Adjacency matrix labels : array, shape = [n_samples, 2] Array of pairs of node id and label id n_classes : integer The number of classes (distinct labels) on the input graph Returns ------- B : array, shape = [n_samples, n_classes] Base matrix """ n_samples = X.shape[0] B = np.zeros((n_samples, n_classes)) B[labels[:, 0], labels[:, 1]] = 1 return B X = nx.to_scipy_sparse_matrix(G) # adjacency matrix labels, label_dict = _get_label_info(G, label_name) if labels.shape[0] == 0: raise nx.NetworkXError("No node on the input graph is labeled by '" + label_name + "'.") n_samples = X.shape[0] n_classes = label_dict.shape[0] F = _init_label_matrix(n_samples, n_classes) P = _build_propagation_matrix(X, labels) B = _build_base_matrix(X, labels, n_classes) remaining_iter = max_iter while remaining_iter > 0: F = _propagate(P, F, B) remaining_iter -= 1 predicted = _predict(F, label_dict) return predicted
def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name='label'): """Node classification by Local and Global Consistency Parameters ---------- G : NetworkX Graph alpha : float Clamping factor max_iter : int Maximum number of iterations allowed label_name : string Name of target labels to predict Raises ---------- `NetworkXError` if no nodes on `G` has `label_name`. Returns ---------- predicted : array, shape = [n_samples] Array of predicted labels Examples -------- >>> from networkx.algorithms import node_classification >>> G = nx.path_graph(4) >>> G.node[0]['label'] = 'A' >>> G.node[3]['label'] = 'B' >>> G.nodes(data=True) NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}}) >>> G.edges() EdgeView([(0, 1), (1, 2), (2, 3)]) >>> predicted = node_classification.local_and_global_consistency(G) >>> predicted ['A', 'A', 'B', 'B'] References ---------- Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004). Learning with local and global consistency. Advances in neural information processing systems, 16(16), 321-328. """ try: import numpy as np except ImportError: raise ImportError( "local_and_global_consistency() requires numpy: ", "http://scipy.org/ ") try: from scipy import sparse except ImportError: raise ImportError( "local_and_global_consistensy() requires scipy: ", "http://scipy.org/ ") def _build_propagation_matrix(X, labels, alpha): """Build propagation matrix of Local and global consistency Parameters ---------- X : scipy sparse matrix, shape = [n_samples, n_samples] Adjacency matrix labels : array, shape = [n_samples, 2] Array of pairs of node id and label id alpha : float Clamping factor Returns ---------- S : scipy sparse matrix, shape = [n_samples, n_samples] Propagation matrix """ degrees = X.sum(axis=0).A[0] degrees[degrees == 0] = 1 # Avoid division by 0 D2 = np.sqrt(sparse.diags((1.0 / degrees), offsets=0)) S = alpha * D2.dot(X).dot(D2) return S def _build_base_matrix(X, labels, alpha, n_classes): """Build base matrix of Local and global consistency Parameters ---------- X : scipy sparse matrix, shape = [n_samples, n_samples] Adjacency matrix labels : array, shape = [n_samples, 2] Array of pairs of node id and label id alpha : float Clamping factor n_classes : integer The number of classes (distinct labels) on the input graph Returns ---------- B : array, shape = [n_samples, n_classes] Base matrix """ n_samples = X.shape[0] B = np.zeros((n_samples, n_classes)) B[labels[:, 0], labels[:, 1]] = 1 - alpha return B X = nx.to_scipy_sparse_matrix(G) # adjacency matrix labels, label_dict = _get_label_info(G, label_name) if labels.shape[0] == 0: raise nx.NetworkXError( "No node on the input graph is labeled by '" + label_name + "'.") n_samples = X.shape[0] n_classes = label_dict.shape[0] F = _init_label_matrix(n_samples, n_classes) P = _build_propagation_matrix(X, labels, alpha) B = _build_base_matrix(X, labels, alpha, n_classes) remaining_iter = max_iter while remaining_iter > 0: F = _propagate(P, F, B) remaining_iter -= 1 predicted = _predict(F, label_dict) return predicted
def harmonic_function(G, max_iter=30, label_name='label'): """Node classification by Harmonic function Parameters ---------- G : NetworkX Graph max_iter : int maximum number of iterations allowed label_name : string name of target labels to predict Raises ---------- `NetworkXError` if no nodes on `G` has `label_name`. Returns ---------- predicted : array, shape = [n_samples] Array of predicted labels Examples -------- >>> from networkx.algorithms import node_classification >>> G = nx.path_graph(4) >>> G.node[0]['label'] = 'A' >>> G.node[3]['label'] = 'B' >>> G.nodes(data=True) NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}}) >>> G.edges() EdgeView([(0, 1), (1, 2), (2, 3)]) >>> predicted = node_classification.harmonic_function(G) >>> predicted ['A', 'A', 'B', 'B'] References ---------- Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August). Semi-supervised learning using gaussian fields and harmonic functions. In ICML (Vol. 3, pp. 912-919). """ try: import numpy as np except ImportError: raise ImportError( "harmonic_function() requires numpy: http://scipy.org/ ") try: from scipy import sparse except ImportError: raise ImportError( "harmonic_function() requires scipy: http://scipy.org/ ") def _build_propagation_matrix(X, labels): """Build propagation matrix of Harmonic function Parameters ---------- X : scipy sparse matrix, shape = [n_samples, n_samples] Adjacency matrix labels : array, shape = [n_samples, 2] Array of pairs of node id and label id Returns ---------- P : scipy sparse matrix, shape = [n_samples, n_samples] Propagation matrix """ degrees = X.sum(axis=0).A[0] degrees[degrees == 0] = 1 # Avoid division by 0 D = sparse.diags((1.0 / degrees), offsets=0) P = D.dot(X).tolil() P[labels[:, 0]] = 0 # labels[:, 0] indicates IDs of labeled nodes return P def _build_base_matrix(X, labels, n_classes): """Build base matrix of Harmonic function Parameters ---------- X : scipy sparse matrix, shape = [n_samples, n_samples] Adjacency matrix labels : array, shape = [n_samples, 2] Array of pairs of node id and label id n_classes : integer The number of classes (distinct labels) on the input graph Returns ---------- B : array, shape = [n_samples, n_classes] Base matrix """ n_samples = X.shape[0] B = np.zeros((n_samples, n_classes)) B[labels[:, 0], labels[:, 1]] = 1 return B X = nx.to_scipy_sparse_matrix(G) # adjacency matrix labels, label_dict = _get_label_info(G, label_name) if labels.shape[0] == 0: raise nx.NetworkXError( "No node on the input graph is labeled by '" + label_name + "'.") n_samples = X.shape[0] n_classes = label_dict.shape[0] F = _init_label_matrix(n_samples, n_classes) P = _build_propagation_matrix(X, labels) B = _build_base_matrix(X, labels, n_classes) remaining_iter = max_iter while remaining_iter > 0: F = _propagate(P, F, B) remaining_iter -= 1 predicted = _predict(F, label_dict) return predicted
def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name='label', return_prob=True): """MODIFIED TO RETURN PROBABILITIES ON F Node classification by Local and Global Consistency References ---------- Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004). Learning with local and global consistency. Advances in neural information processing systems, 16(16), 321-328. """ from networkx.algorithms.node_classification.utils import ( _get_label_info, _init_label_matrix, _propagate, _predict, ) import numpy as np from scipy import sparse def _build_propagation_matrix(X, labels, alpha): degrees = X.sum(axis=0).A[0] degrees[degrees == 0] = 1 # Avoid division by 0 D2 = np.sqrt(sparse.diags((1.0 / degrees), offsets=0)) S = alpha * D2.dot(X).dot(D2) return S def _build_base_matrix(X, labels, alpha, n_classes): n_samples = X.shape[0] B = np.zeros((n_samples, n_classes)) B[labels[:, 0], labels[:, 1]] = 1 - alpha return B X = nx.to_scipy_sparse_matrix(G) # adjacency matrix labels, label_dict = _get_label_info(G, label_name) if labels.shape[0] == 0: raise nx.NetworkXError("No node on the input graph is labeled by '" + label_name + "'.") n_samples = X.shape[0] n_classes = label_dict.shape[0] F = _init_label_matrix(n_samples, n_classes) P = _build_propagation_matrix(X, labels, alpha) B = _build_base_matrix(X, labels, alpha, n_classes) remaining_iter = max_iter while remaining_iter > 0: F = _propagate(P, F, B) remaining_iter -= 1 predicted = _predict(F, label_dict) if return_prob: return (predicted, pd.DataFrame( {label_dict[i]: F[:, i] for i in range(F.shape[1])}, index=list(G.nodes()))) return predicted