def fit(self, data, alg='mult'): """ Fit a CNMF model to the data. Parameters ---------- data : array-like, shape (n_time, n_features) Training data to fit. alg : string {'mult', 'bcd'}, optional Algorithm used to fit the data. Returns ------- self : object Returns the instance itself. """ # Check input if (data < 0).any(): raise ValueError('Negative values in data to fit') mag = np.amax(data) data = ShiftMatrix(data, self.maxlag) m, n = data.shape # initialize W and H self.W = mag * np.abs( np.random.rand(self.maxlag * 2 + 1, m, self.n_components)) self.H = ShiftMatrix( mag * np.abs(np.random.rand(self.n_components, n)), self.maxlag) # optimize if (alg == 'bcd_backtrack'): fit_bcd(data, self, step_type='backtrack') elif (alg == 'bcd_const'): fit_bcd(data, self, step_type='constant') elif (alg == 'mult'): fit_mult(data, self) else: raise ValueError('No such algorithm found.') # compute explanatory power of each factor loadings = compute_loadings(data, self.W, self.H, self._shifts) # sort factors by power ind = np.argsort(loadings) self.W = self.W[:, :, ind] self.H.assign(self.H.shift(0)[ind, :]) return self
def agg_data(self): """ Aggregate the NMF data into a single file for ease of loading. Also calculate reconstruction and regularization errors for each index and variable. """ cwt_matrix = load_cwt_matrix(self.exp_dir, self.exp_name) NMF_idxs = range(int(self.metadata['NMF']['seqnmf_norm_steps'])) Ws = None Hs = None Xs = None errs = np.empty((len(NMF_idxs), self.num_vars, 2)) * np.nan for iR in NMF_idxs: print(iR) NMF_model_list = load_NMF_factors_single_norm( self.exp_dir, self.exp_name, iR) if Ws is None: W_shape = (len(NMF_idxs), self.num_vars, self.num_max_patterns) + \ NMF_model_list[0].W[:, :, 0].shape Ws = np.empty(W_shape) * np.nan if Hs is None: H_shape = (len(NMF_idxs), self.num_vars, self.num_max_patterns) + \ NMF_model_list[0].H.shift(0)[0].shape Hs = np.empty(H_shape) * np.nan if Xs is None: X_shape = (len(NMF_idxs), self.num_vars, self.num_max_patterns, W_shape[-1], H_shape[-1]) Xs = np.empty(X_shape) * np.nan for iV in range(self.num_vars): # Get W, H, X for each pattern. Full X is sum over patterns. for iP in range(self.num_max_patterns): model = NMF_model_list[iV] Ws[iR, iV, iP] = model.W[:, :, iP] Hs[iR, iV, iP] = model.H.shift(0)[iP] Xs[iR, iV, iP] = vector_conv( Ws[iR, iV, iP], shiftVector(model.H.shift(0)[iP], model.H.L), model._shifts) norm = np.linalg.norm(cwt_matrix[:, :, iV]) reconstruct_err = np.linalg.norm( cwt_matrix[:, :, iV] - np.sum(Xs[iR, iV], axis=0)) / norm regularize_err = compute_scfo_reg( ShiftMatrix(cwt_matrix[:, :, iV], self.pattern_length), model.W, model.H, model._shifts, model._kernel) / norm**2 errs[iR, iV, 0] = reconstruct_err errs[iR, iV, 1] = regularize_err save_all_NMF_data(self.exp_dir, self.exp_name, Ws, Hs, Xs, errs)
def compute_scfo_reg(data, W, H, shifts, kernel): # smooth H maxlag = int((len(shifts) - 1) / 2) smooth_H = _smooth(H.shift(0).T, kernel) # penalize H pen_H = ShiftMatrix(np.dot(data.shift(0), smooth_H), maxlag) # penalize W penalty = tensor_transconv(W, pen_H, shifts) return norm(penalty)
def compute_scfo_gH(data, W, H, shifts, kernel): K, T = H.shape # smooth data maxlag = int((len(shifts) - 1) / 2) smooth_data = ShiftMatrix(_smooth(data.shift(0), kernel), maxlag) not_eye = np.ones((K, K)) - np.eye(K) # apply transpose convolution return not_eye.dot(tensor_transconv(W, smooth_data, shifts))
def _backtrack(data, grad_W, grad_H, model, beta=0.8, alpha=0.00001, max_iters=500): """Backtracking line search to find a step length. """ shifts = model._shifts # compute initial loss and gradient magnitude past_loss = compute_loss(data, model.W, model.H, shifts) if (model.l2_scfo != 0): # regularizer past_loss += model.l2_scfo * compute_scfo_reg(data, model.W, model.H, shifts, model._kernel) grad_mag = la.norm(grad_W)**2 + la.norm(grad_H)**2 new_loss = past_loss t = 1.0 iters = 0 new_H = ShiftMatrix(model.H.shift(0), model.maxlag) # backtracking line search while ((new_loss > past_loss - alpha * t * grad_mag) and (iters < max_iters)): t = beta * t new_H.assign(np.maximum(model.H.shift(0) - t * grad_H, 0)) new_W = np.maximum(model.W - t * grad_W, 0) new_loss = compute_loss(data, new_W, new_H, shifts) if (model.l2_scfo != 0): # regularizer new_loss += model.l2_scfo * compute_scfo_reg( data, new_W, new_H, shifts, model._kernel) iters += 1 return t
def compute_loadings(data, W, H, shifts): """ Compute the power explained by each factor. """ loadings = [] K, T = H.shape maxlag = int((len(shifts) - 1) / 2) data_mag = norm(data.shift(0)) for i in range(K): Wi = W[:, :, i:i + 1] Hi = ShiftMatrix(H.shift(0)[i:i + 1, :], maxlag) est = tensor_conv(Wi, Hi, shifts) loadings += [norm(est - data.shift(0)) / (data_mag + EPSILON)] return loadings
def compute_gH(data, W, H, shifts): """ Compute the gradient of H. """ # compute estimate est = tensor_conv(W, H, shifts) # compute residual and loss resid = est - data.shift(0) loss = norm(resid) # wrap residual in ShiftMatrix maxlag = int((len(shifts) - 1) / 2) resid = ShiftMatrix(resid, maxlag) # compute grad Hgrad = tensor_transconv(W, resid, shifts) return loss, Hgrad
class CNMF(object): def __init__(self, n_components, maxlag, tol=1e-5, n_iter_max=100, l2_scfo=0, l1_W=0.0, l1_H=0.0): """ Convolutive Non-Negative Matrix Factorization (CNMF) Factors a matrix into a convolution between a tensor `W` and a matrix `H`. Parameters ---------- n_components : int Number of components to fit. n_vars: int number of distinct variable units in the combNMF maxlag : int Maximum time lag in each sequence. A single sequence can lag up to `maxlag` entries left or right and has length `2*maxlag+1`. tol : float, optional Tolerance for convergence. If the change in cost is less than the `tol`, the algorithm will terminate early. n_iter_max : int, optional Maximum number of iterations during algorithm fitting. l2_scfo : float, optional Weight of the soft cross-factors orthogonality regularizer. See references for details. l1_W : float, optional Weight of the L1 regularizer for the entries of `W`. l1_H : float, optional Weight of the L1 regularizer for the entries of `H`. References ---------- See Mackevicius, Bahle, et al., *Unsupervised discovery of temporal sequences in high-dimensional datasets, with applications to neuroscience.* """ self.n_components = n_components #self.num_vars = n_vars self.maxlag = maxlag self.W = None self.H = None self.seq_norm = np.zeros(n_iter_max) #self.comb_norm = np.zeros(n_iter_max) self.tol = 1e-4 self.n_iter_max = n_iter_max self.l2_scfo = l2_scfo self.l1_W = l1_W self.l1_H = l1_H #self.l_comb = l_comb self._shifts = np.arange(maxlag * 2 + 1) - maxlag self._kernel = compute_smooth_kernel(maxlag) self.loss_hist = None def fit(self, data, alg='mult'): """ Fit a CNMF model to the data. Parameters ---------- data : array-like, shape (n_time, n_features) Training data to fit. alg : string {'mult', 'bcd'}, optional Algorithm used to fit the data. Returns ------- self : object Returns the instance itself. """ # Check input if (data < 0).any(): raise ValueError('Negative values in data to fit') mag = np.amax(data) data = ShiftMatrix(data, self.maxlag) m, n = data.shape # initialize W and H self.W = mag * np.abs( np.random.rand(self.maxlag * 2 + 1, m, self.n_components)) self.H = ShiftMatrix( mag * np.abs(np.random.rand(self.n_components, n)), self.maxlag) # optimize if (alg == 'bcd_backtrack'): fit_bcd(data, self, step_type='backtrack') elif (alg == 'bcd_const'): fit_bcd(data, self, step_type='constant') elif (alg == 'mult'): fit_mult(data, self) else: raise ValueError('No such algorithm found.') # compute explanatory power of each factor loadings = compute_loadings(data, self.W, self.H, self._shifts) # sort factors by power ind = np.argsort(loadings) self.W = self.W[:, :, ind] self.H.assign(self.H.shift(0)[ind, :]) return self def predict(self): """ Return low-rank reconstruction of data. Returns ------- est : array-like, shape (n_time, n_features) Reconstruction of the data using `W` and `H`. """ # check that W and H are fit self._check_is_fitted() return tensor_conv(self.W, self.H, self._shifts) def _check_is_fitted(self): """ Check if `W`, `H` have been fitted. """ if self.W is None or self.H is None: raise ValueError('This ConvNMF instance is not fitted yet.' 'Call \'fit\' with appropriate arguments ' 'before using this method.')