def _log_normalize(X): """Normalize ``X`` according to Kluger's log-interactions scheme.""" X = make_nonnegative(X, min_value=1) if issparse(X): raise ValueError( "Cannot compute log of a sparse matrix," " because log(x) diverges to -infinity as x" " goes to 0." ) L = np.log(X) row_avg = L.mean(axis=1)[:, np.newaxis] col_avg = L.mean(axis=0) avg = L.mean() return L - row_avg - col_avg + avg
def _log_normalize(X): """Normalize ``X`` according to Kluger's log-interactions scheme.""" X = make_nonnegative(X, min_value=1) if issparse(X): raise ValueError("Cannot compute log of a sparse matrix," " because log(x) diverges to -infinity as x" " goes to 0.") L = np.log(X) row_avg = L.mean(axis=1)[:, np.newaxis] col_avg = L.mean(axis=0) avg = L.mean() return L - row_avg - col_avg + avg
def _scale_normalize(X): """Normalize ``X`` by scaling rows and columns independently. Returns the normalized matrix and the row and column scaling factors. """ X = make_nonnegative(X) row_diag = np.asarray(1.0 / np.sqrt(X.sum(axis=1))).squeeze() col_diag = np.asarray(1.0 / np.sqrt(X.sum(axis=0))).squeeze() row_diag = np.where(np.isnan(row_diag), 0, row_diag) col_diag = np.where(np.isnan(col_diag), 0, col_diag) if issparse(X): n_rows, n_cols = X.shape r = dia_matrix((row_diag, [0]), shape=(n_rows, n_rows)) c = dia_matrix((col_diag, [0]), shape=(n_cols, n_cols)) an = r * X * c else: an = row_diag[:, np.newaxis] * X * col_diag return an, row_diag, col_diag
def _bistochastic_normalize(X, max_iter=1000, tol=1e-5): """Normalize rows and columns of ``X`` simultaneously so that all rows sum to one constant and all columns sum to a different constant. """ # According to paper, this can also be done more efficiently with # deviation reduction and balancing algorithms. X = make_nonnegative(X) X_scaled = X dist = None for _ in range(max_iter): X_new, _, _ = _scale_normalize(X_scaled) if issparse(X): dist = norm(X_scaled.data - X.data) else: dist = norm(X_scaled - X_new) X_scaled = X_new if dist is not None and dist < tol: break return X_scaled