Пример #1
0
 def normalize_adj(self, mx: sp.coo_matrix) -> sp.coo_matrix:
     """Row-normalize sparse matrix"""
     rowsum = np.array(mx.sum(1))
     r_inv_sqrt = np.power(rowsum, -0.5).flatten()
     r_inv_sqrt[np.isinf(r_inv_sqrt)] = 0.
     r_mat_inv_sqrt = sp.diags(r_inv_sqrt)
     return mx.dot(r_mat_inv_sqrt).transpose().dot(r_mat_inv_sqrt).tocoo()
Пример #2
0
def normalize_sp(mx: sp.coo_matrix) -> sp.coo_matrix:
    rows_sum = np.array(mx.sum(1)).astype('float')  # 对每一行求和
    rows_inv = np.power(rows_sum, -1).flatten()  # 求倒数
    rows_inv[np.isinf(rows_inv)] = 0  # 如果某一行全为0,则r_inv算出来会等于无穷大,将这些行的r_inv置为0
    rows_mat_inv = sp.diags(rows_inv)  # 构建对角元素为r_inv的对角矩阵
    mx = rows_mat_inv.dot(mx)  # .dot(cols_mat_inv)
    return mx
Пример #3
0
 def normalize(self, mx: sp.coo_matrix) -> sp.coo_matrix:
     """Row-normalize sparse matrix"""
     rowsum = np.array(mx.sum(1))
     r_inv = np.power(rowsum, -1.0).flatten()
     r_inv[np.isinf(r_inv)] = 0.
     r_mat_inv = sp.diags(r_inv)
     mx = r_mat_inv.dot(mx).tocoo()
     return mx
Пример #4
0
def laplacian_score(X: np.ndarray, W: sparse.coo_matrix) -> np.ndarray:
	"""
	This function implements the laplacian score feature selection, steps are as follows:
	1. Construct the affinity matrix W if it is not specified
	2. For the r-th feature, we define fr = X(:,r), D = diag(W*ones), ones = [1,...,1]', L = D - W
	3. Let fr_hat = fr - (fr'*D*ones)*ones/(ones'*D*ones)
	4. Laplacian score for the r-th feature is score = (fr_hat'*L*fr_hat)/(fr_hat'*D*fr_hat)
	Input
	-----
	X: {numpy array}, shape (n_samples, n_features)
		input data
	W: {sparse matrix}, shape (n_samples, n_samples)
		input affinity matrix

	Output
	------
	score: {numpy array}, shape (n_features,)
		laplacian score for each feature

	Reference
	---------
	He, Xiaofei et al. "Laplacian Score for Feature Selection." NIPS 2005.
	"""

	# build the diagonal D matrix from affinity matrix W
	D = np.array(W.sum(axis=1))
	L = W
	tmp = np.dot(np.transpose(D), X)
	D = sparse.diags(np.transpose(D), [0])
	Xt = np.transpose(X)
	t1 = np.transpose(np.dot(Xt, D.todense()))
	t2 = np.transpose(np.dot(Xt, L.todense()))
	# compute the numerator of Lr
	D_prime = np.sum(np.multiply(t1, X), 0) - np.multiply(tmp, tmp) / D.sum()
	# compute the denominator of Lr
	L_prime = np.sum(np.multiply(t2, X), 0) - np.multiply(tmp, tmp) / D.sum()
	# avoid the denominator of Lr to be 0
	D_prime[D_prime < 1e-12] = 10000

	# compute laplacian score for all features
	score = 1 - np.array(np.multiply(L_prime, 1 / D_prime))[0, :]
	return np.transpose(score)
Пример #5
0
 def getDegrees(mtx: sp.coo_matrix) -> int:
     return np.array(mtx.sum(axis=0)).squeeze()