def test_geigh_Lsym_bp(): Xs = [ np.random.uniform(size=(4, 5)), np.random.uniform(size=(5, 5)), np.random.uniform(size=(5, 1)) ] for X in Xs: Lsym = get_sym_laplacian_bp(X) true_evals, true_evecs = eigh_wrapper(Lsym, rank=None) for rank in range(1, sum(X.shape) + 1): gevals, gevecs = geigh_Lsym_bp(X, rank=rank, zero_tol=1e-10, end='largest') check_geigh_Lsym_internal_no_zeros(X, gevals, gevecs, rank) assert np.allclose(gevals[:rank], true_evals[:rank]) gevals, gevecs = geigh_Lsym_bp(X, rank=rank, zero_tol=1e-10, end='smallest') check_geigh_Lsym_internal_no_zeros(X, gevals, gevecs, rank) assert np.allclose(gevals[:rank], true_evals[-rank:]) rank = None gevals, gevecs = geigh_Lsym_bp(X, rank=rank, zero_tol=1e-10, end='largest') check_geigh_Lsym_internal_no_zeros(X, gevals, gevecs, rank) gevals, gevecs = geigh_Lsym_bp(X, rank=rank, zero_tol=1e-10, end='smallest') check_geigh_Lsym_internal_no_zeros(X, gevals, gevecs, rank) # test with zero rows/cols X = deepcopy(Xs)[0] X[0, :] = 0 X[:, 0] = 0 true_gevals, true_zero_mask = true_gevals_Lsym(X) for rank in range(1, 7 + 1): # make sure gen evals are correct gevals, gevecs = geigh_Lsym_bp(X, rank=rank, zero_tol=1e-10, end='largest') # check_geigh_Lsym_internal(X, gevals, gevecs, rank=rank) assert np.allclose(gevals, true_gevals[:rank]) # check gen evecs have correct zero rows assert np.allclose(abs(gevecs[true_zero_mask]).sum(), 0)
def eigh_Lsym_bp(X, rank=None): """ Computes the largest eigenvectors of Lsym(A_bp(X)) directly using scipy.linalg.eigh. Paramters --------- X: array-like, (n_rows, n_cols) The data matrix. rank: None, int The rank to compute. Output ------ evals, evecs evals: array-like, (rank, ) The largest evals Lsym(A_bp(X)). evecs: array-like, (n_row + n_cols, rank) The corresponding eigenvectors. """ Lsym = get_sym_laplacian_bp(X) return eigh_wrapper(Lsym, rank=rank)
def summarize_bd(D, n_blocks, zero_thresh=None, lap='sym'): assert lap in ['sym', 'un'] comm_summary, Pi_comm = community_summary(D, zero_thresh=zero_thresh) print(comm_summary) plt.figure(figsize=(8, 4)) if lap == 'sym': evals = eigh_Lsym_bp(D)[0] else: Lun = get_unnorm_laplacian_bp(D) evals = eigh_wrapper(Lun)[0] plt.subplot(1, 2, 1) plt.plot(evals, marker='.') plt.title('all evals of L_{}'.format(lap)) plt.subplot(1, 2, 2) plt.plot(evals[-n_blocks:], marker='.') plt.title('smallest {} evals'.format(n_blocks)) print('evals', evals) # print('found {} communities of sizes {}'.format(summary['n_communities'], summary['comm_shapes'])) plt.figure() sns.heatmap(Pi_comm, cmap='Blues', square=True, cbar=False, vmin=0) plt.xlabel('View 1 clusters') plt.ylabel('View 2 clusters')
def _e_step(self, X): """ Parameters ---------- X: The observed data. Output ------ E_out: dict E_out['log_resp']: array-like E_out['obs_nll']: float E_out['evals']: array-like, (n_blocks, ) E_out['eig_var']: array-like """ # standard E-step log_prob = self.log_probs(X) log_resp = self.log_resps(log_prob) obs_nll = - logsumexp(log_prob, axis=1).mean() if self.n_blocks is not None: B = self.n_blocks else: B = len(self.eval_weights) assert self.__mode in ['lap_pen', 'fine_tune_bd'] if self.__mode == 'lap_pen' and self.n_blocks != 1: if self.lap == 'sym': evals, eig_var = geigh_Lsym_bp_smallest(X=self.bd_weights_, rank=B, zero_tol=1e-10, method='tsym') elif self.lap == 'un': Lun = get_unnorm_laplacian_bp(self.bd_weights_) all_evals, all_evecs = eigh_wrapper(Lun) eig_var = all_evecs[:, -B:] evals = all_evals[-B:] else: # if self.__mode == 'fine_tune_bd': evals = None eig_var = None return {'log_resp': log_resp, 'obs_nll': obs_nll, 'evals': evals, 'eig_var': eig_var}
def check_geigh_Lsym_bp_from_Tsym(X, rank=None, method='direct'): """ Checks the output of geigh_Lsym_bp_from_Tsym """ Lun = get_unnorm_laplacian_bp(X) degs = get_deg_bp(X) true_gevals, true_gevecs = eigh_wrapper(A=Lun, B=np.diag(degs)) if rank is None: _rank = min(X.shape) else: _rank = rank # check largest eigenvectors gevals, gevecs = geigh_sym_laplacian_bp(X=X, rank=rank, method=method, end='largest') for k in range(len(gevals)): # check the gevals are correct assert np.allclose(gevals[k], true_gevals[k]) if not np.allclose(gevals[k], 1): # non-unique subspace for 1 evals # check the gen evecs span the correct subspaces a = angle(gevecs[:, k], true_gevecs[:, k], subspace=True) assert a < 1e-4 # check proper normalization assert np.allclose(gevecs.T @ np.diag(degs) @ gevecs, np.eye(gevecs.shape[1])) # check smallest eigenvectors gevals, gevecs = geigh_sym_laplacian_bp(X=X, rank=rank, method=method, end='smallest') base_idx = sum(X.shape) - min(X.shape) + (min(X.shape) - _rank) for k in range(len(gevals)): # print(gevals[k], true_gevals[base_idx + k]) # check the gevals are correct assert np.allclose(gevals[k], true_gevals[base_idx + k]) if not np.allclose(gevals[k], 1): # non-unique subspace for 1 evals # check the gen evecs span the correct subspaces a = angle(gevecs[:, k], true_gevecs[:, base_idx + k], subspace=True) assert a < 1e-4 # check proper normalization assert np.allclose(gevecs.T @ np.diag(degs) @ gevecs, np.eye(gevecs.shape[1]))
def true_gevals_Lsym(X, zero_tol=1e-10): Lsym = get_sym_laplacian_bp(X) true_evals, true_evecs = eigh_wrapper(Lsym, rank=None) zero_row_mask = np.linalg.norm(X, axis=1) < zero_tol zero_col_mask = np.linalg.norm(X, axis=0) < zero_tol n_iso_verts = sum(zero_row_mask) + sum(zero_col_mask) meow = max(X.shape) - n_iso_verts true_gevals = np.concatenate([ true_evals[0:meow], [1] * (max(X.shape) - min(X.shape)), true_evals[-meow:] ]) true_gevals = np.sort(true_gevals)[::-1] true_zero_mask = np.concatenate([zero_row_mask, zero_col_mask]) return true_gevals, true_zero_mask
def check_eigh_Lsym_bp_from_Tsym(X, rank=None): """ Checks the output of get_sym_laplacian_bp """ Lsym = get_sym_laplacian_bp(X) true_evals, true_evecs = eigh_wrapper(Lsym) if rank is None: _rank = min(X.shape) else: _rank = rank # check largest eigenvectors evals, evecs = eigh_Lsym_bp_from_Tsym(X, end='largest', rank=rank) for k in range(len(evals)): # check the evals are correct assert np.allclose(evals[k], true_evals[k]) if not np.allclose(evals[k], 1): # non-unique subspace for 1 evals # check eigenvectors point in the same direction a = angle(true_evecs[:, k], evecs[:, k], subspace=True) assert a < 1e-4 # check normalization assert np.allclose(evecs.T @ evecs, np.eye(evecs.shape[1])) # check smallest eigenvectors evals, evecs = eigh_Lsym_bp_from_Tsym(X, end='smallest', rank=rank) base_idx = sum(X.shape) - min(X.shape) + (min(X.shape) - _rank) for k in range(len(evals)): # check the evals are correct assert np.allclose(evals[k], true_evals[base_idx + k]) if not np.allclose(evals[k], 1): # non-unique subspace for 1 evals # check eigenvectors point in the same direction a = angle(true_evecs[:, base_idx + k], evecs[:, k], subspace=True) assert a < 1e-4 # check normalization assert np.allclose(evecs.T @ evecs, np.eye(evecs.shape[1]))
def check_vs_truth_smallest_eigh_Lsym_bp_from_Tsym_no_zeros(X, rank): """ Check against ground truth """ evals, evecs = smallest_eigh_Lsym_bp_from_Tsym_no_zeros(X, rank=rank) if rank is None: rank = min(X.shape) Lsym = get_sym_laplacian_bp(X) evals_true, evecs_true = eigh_wrapper(A=Lsym) evals_true = evals_true[-rank:] evecs_true = evecs_true[:, -rank:] # check gevals match true gecals assert np.allclose(evals, evals_true) # check evecs span the correct space for k in range(rank): # ignore 1 evals since the evecs are non-unique if not np.allclose(evals[k], 1): assert angle(evecs[:, k], evecs_true[:, k], subspace=True) < 1e-4
def geigh_Lsym_bp(X, rank=None, zero_tol=1e-10, end='smallest'): """ Computes the largest or smallest generalized eigenvectors of [Lun(A_bp(X)), deg(A_bp(X))] directly using scipy.linalg.eigh. Paramters --------- X: array-like, (n_rows, n_cols) The data matrix. rank: None, int The rank to compute. If None, will compute as many gevals as possible. This will depend on the number of zero rows/columns X. zero_tol: float Tolerance to identify zero rows/columns by their norm. end: str Must be one of ['smallest', 'largest']. Compute the smallest or largest generalized eigenvectors. Output ------ gevals, gevecs gevals: array-like, (rank, ) The smallest or largest generalized eigenvalues. gevecs: array-like, (n_rows + n_cols, rank) The corresponding generalized eigenvectors. Normalized such that gevecs.T @ deg(A_bp(X)) gevecs = I """ assert end in ['smallest', 'largest'] # get X without its zero rows/columns zero_row_mask = np.linalg.norm(X, axis=1) < zero_tol zero_col_mask = np.linalg.norm(X, axis=0) < zero_tol X_woz = X[~zero_row_mask, :][:, ~zero_col_mask] if rank is None: rank = min(X_woz.shape) assert 1 <= rank and rank <= sum(X.shape) if rank > sum(X_woz.shape): raise ValueError("X has too many zero rows/columns.") # compute generalized eigenvectors/values for X without its # zero rows and columns Lun = get_unnorm_laplacian_bp(X_woz) degs = get_deg_bp(X_woz) if end == 'largest': gevals, gevecs_woz = eigh_wrapper(A=Lun, B=np.diag(degs), rank=rank) elif end == 'smallest': gevals, gevecs_woz = eigh_wrapper(A=-Lun, B=np.diag(degs), rank=rank) gevals = - gevals gevals = gevals[::-1] gevecs_woz = gevecs_woz[:, ::-1] # get gen eval/vecs for X by putting zeros back into gen evectors gevecs = np.zeros((sum(X.shape), rank)) non_zero_mask = ~ np.concatenate([zero_row_mask, zero_col_mask]) gevecs[non_zero_mask, :] = gevecs_woz return gevals, gevecs
def compute_tracking_data(self, X, E_out=None): """ Optimization history to keep track of. """ out = {} if E_out is None: E_out = self._e_step(X) # maybe track model history if self.history_tracking >= 2: out['model'] = deepcopy(self._get_parameters()) if 'obs_nll' in E_out.keys(): out['obs_nll'] = E_out['obs_nll'] else: out['obs_nll'] = - self.score(X) # if we are fine tuning with a fixed zero mask the loss function # is just the observed negative log likelihood if self.__mode == 'fine_tune_bd' or self.n_blocks == 1: out['loss_val'] = out['obs_nll'] return out if self.n_blocks is not None: B = self.n_blocks else: B = len(self.eval_weights) # evals of current step if 'evals' in E_out.keys(): evals = E_out['evals'] else: if self.lap == 'sym': evals, _ = geigh_Lsym_bp_smallest(X=self.bd_weights_, rank=B, zero_tol=1e-10, method='tsym') elif self.lap == 'un': Lun = get_unnorm_laplacian_bp(self.bd_weights_) all_evals, all_evecs = eigh_wrapper(Lun) evals = all_evals[-B:] out['raw_eval_sum'] = sum(evals) if self.eval_weights is not None: eval_sum = evals.T @ asc_sort(self.eval_weights) else: # vanilla sum assert len(evals) == B eval_sum = sum(evals) out['eval_sum'] = eval_sum out['eval_loss'] = self.eval_pen_ * eval_sum out['evan_pen'] = deepcopy(self.eval_pen_) # overall loss out['loss_val'] = out['obs_nll'] + out['eval_loss'] return out