Пример #1
0
    def fit(self, data, alg='mult'):
        """
		Fit a CNMF model to the data.

		Parameters
		----------
		data : array-like, shape (n_time, n_features)
			Training data to fit.
		alg : string {'mult', 'bcd'}, optional
			Algorithm used to fit the data.

		Returns
		-------
		self : object
			Returns the instance itself.
		"""
        # Check input
        if (data < 0).any():
            raise ValueError('Negative values in data to fit')

        mag = np.amax(data)
        data = ShiftMatrix(data, self.maxlag)
        m, n = data.shape

        # initialize W and H
        self.W = mag * np.abs(
            np.random.rand(self.maxlag * 2 + 1, m, self.n_components))
        self.H = ShiftMatrix(
            mag * np.abs(np.random.rand(self.n_components, n)), self.maxlag)

        # optimize
        if (alg == 'bcd_backtrack'):
            fit_bcd(data, self, step_type='backtrack')
        elif (alg == 'bcd_const'):
            fit_bcd(data, self, step_type='constant')
        elif (alg == 'mult'):
            fit_mult(data, self)
        else:
            raise ValueError('No such algorithm found.')

        # compute explanatory power of each factor
        loadings = compute_loadings(data, self.W, self.H, self._shifts)

        # sort factors by power
        ind = np.argsort(loadings)
        self.W = self.W[:, :, ind]
        self.H.assign(self.H.shift(0)[ind, :])

        return self
Пример #2
0
    def agg_data(self):
        """
		Aggregate the NMF data into a single file for ease of loading.
		Also calculate reconstruction and regularization errors for 
		each index and variable.
		"""

        cwt_matrix = load_cwt_matrix(self.exp_dir, self.exp_name)
        NMF_idxs = range(int(self.metadata['NMF']['seqnmf_norm_steps']))
        Ws = None
        Hs = None
        Xs = None
        errs = np.empty((len(NMF_idxs), self.num_vars, 2)) * np.nan

        for iR in NMF_idxs:
            print(iR)
            NMF_model_list = load_NMF_factors_single_norm(
                self.exp_dir, self.exp_name, iR)
            if Ws is None:
                W_shape = (len(NMF_idxs), self.num_vars,
                     self.num_max_patterns) + \
                     NMF_model_list[0].W[:, :, 0].shape
                Ws = np.empty(W_shape) * np.nan
            if Hs is None:
                H_shape = (len(NMF_idxs), self.num_vars,
                     self.num_max_patterns) + \
                     NMF_model_list[0].H.shift(0)[0].shape
                Hs = np.empty(H_shape) * np.nan
            if Xs is None:
                X_shape = (len(NMF_idxs), self.num_vars, self.num_max_patterns,
                           W_shape[-1], H_shape[-1])
                Xs = np.empty(X_shape) * np.nan

            for iV in range(self.num_vars):

                # Get W, H, X for each pattern. Full X is sum over patterns.
                for iP in range(self.num_max_patterns):
                    model = NMF_model_list[iV]
                    Ws[iR, iV, iP] = model.W[:, :, iP]
                    Hs[iR, iV, iP] = model.H.shift(0)[iP]
                    Xs[iR, iV, iP] = vector_conv(
                        Ws[iR, iV, iP],
                        shiftVector(model.H.shift(0)[iP], model.H.L),
                        model._shifts)

                norm = np.linalg.norm(cwt_matrix[:, :, iV])
                reconstruct_err = np.linalg.norm(
                    cwt_matrix[:, :, iV] - np.sum(Xs[iR, iV], axis=0)) / norm
                regularize_err = compute_scfo_reg(
                    ShiftMatrix(cwt_matrix[:, :, iV], self.pattern_length),
                    model.W, model.H, model._shifts, model._kernel) / norm**2
                errs[iR, iV, 0] = reconstruct_err
                errs[iR, iV, 1] = regularize_err

        save_all_NMF_data(self.exp_dir, self.exp_name, Ws, Hs, Xs, errs)
Пример #3
0
def compute_scfo_reg(data, W, H, shifts, kernel):
	# smooth H
	maxlag = int((len(shifts) - 1) / 2)
	smooth_H = _smooth(H.shift(0).T, kernel)

	# penalize H
	pen_H = ShiftMatrix(np.dot(data.shift(0), smooth_H), maxlag)

	# penalize W
	penalty = tensor_transconv(W, pen_H, shifts)
	return norm(penalty)
Пример #4
0
def compute_scfo_gH(data, W, H, shifts, kernel):
	K, T = H.shape

	# smooth data
	maxlag = int((len(shifts) - 1) / 2)
	smooth_data = ShiftMatrix(_smooth(data.shift(0), kernel), maxlag)

	not_eye = np.ones((K, K)) - np.eye(K)

	# apply transpose convolution
	return not_eye.dot(tensor_transconv(W, smooth_data, shifts))
Пример #5
0
def _backtrack(data,
               grad_W,
               grad_H,
               model,
               beta=0.8,
               alpha=0.00001,
               max_iters=500):
    """Backtracking line search to find a step length.
	"""
    shifts = model._shifts

    # compute initial loss and gradient magnitude
    past_loss = compute_loss(data, model.W, model.H, shifts)
    if (model.l2_scfo != 0):  # regularizer
        past_loss += model.l2_scfo * compute_scfo_reg(data, model.W, model.H,
                                                      shifts, model._kernel)

    grad_mag = la.norm(grad_W)**2 + la.norm(grad_H)**2

    new_loss = past_loss
    t = 1.0
    iters = 0
    new_H = ShiftMatrix(model.H.shift(0), model.maxlag)
    # backtracking line search
    while ((new_loss > past_loss - alpha * t * grad_mag)
           and (iters < max_iters)):
        t = beta * t

        new_H.assign(np.maximum(model.H.shift(0) - t * grad_H, 0))
        new_W = np.maximum(model.W - t * grad_W, 0)
        new_loss = compute_loss(data, new_W, new_H, shifts)
        if (model.l2_scfo != 0):  # regularizer
            new_loss += model.l2_scfo * compute_scfo_reg(
                data, new_W, new_H, shifts, model._kernel)

        iters += 1

    return t
Пример #6
0
def compute_loadings(data, W, H, shifts):
    """
	Compute the power explained by each factor.
	"""
    loadings = []
    K, T = H.shape
    maxlag = int((len(shifts) - 1) / 2)

    data_mag = norm(data.shift(0))

    for i in range(K):
        Wi = W[:, :, i:i + 1]
        Hi = ShiftMatrix(H.shift(0)[i:i + 1, :], maxlag)
        est = tensor_conv(Wi, Hi, shifts)
        loadings += [norm(est - data.shift(0)) / (data_mag + EPSILON)]

    return loadings
Пример #7
0
def compute_gH(data, W, H, shifts):
    """
	Compute the gradient of H.
	"""
    # compute estimate
    est = tensor_conv(W, H, shifts)

    # compute residual and loss
    resid = est - data.shift(0)
    loss = norm(resid)

    # wrap residual in ShiftMatrix
    maxlag = int((len(shifts) - 1) / 2)
    resid = ShiftMatrix(resid, maxlag)

    # compute grad
    Hgrad = tensor_transconv(W, resid, shifts)

    return loss, Hgrad
Пример #8
0
class CNMF(object):
    def __init__(self,
                 n_components,
                 maxlag,
                 tol=1e-5,
                 n_iter_max=100,
                 l2_scfo=0,
                 l1_W=0.0,
                 l1_H=0.0):
        """
		Convolutive Non-Negative Matrix Factorization (CNMF)

		Factors a matrix into a convolution between a tensor `W` and a
		matrix `H`.

		Parameters
		----------
		n_components : int
			Number of components to fit.
		n_vars: int
			number of distinct variable units in the combNMF
		maxlag : int
			Maximum time lag in each sequence. A single sequence can lag up to
			`maxlag` entries left or right and has length `2*maxlag+1`.
		tol : float, optional
			Tolerance for convergence. If the change in cost is less than the
			`tol`, the algorithm will terminate early.
		n_iter_max : int, optional
			Maximum number of iterations during algorithm fitting.
		l2_scfo : float, optional
			Weight of the soft cross-factors orthogonality regularizer. See references for details.
		l1_W : float, optional
			Weight of the L1 regularizer for the entries of `W`.
		l1_H : float, optional
			Weight of the L1 regularizer for the entries of `H`.

		References
		----------
		See Mackevicius, Bahle, et al., *Unsupervised discovery of temporal
		sequences in high-dimensional datasets, with applications to
		neuroscience.*
		"""
        self.n_components = n_components
        #self.num_vars = n_vars
        self.maxlag = maxlag

        self.W = None
        self.H = None

        self.seq_norm = np.zeros(n_iter_max)
        #self.comb_norm = np.zeros(n_iter_max)

        self.tol = 1e-4
        self.n_iter_max = n_iter_max

        self.l2_scfo = l2_scfo
        self.l1_W = l1_W
        self.l1_H = l1_H
        #self.l_comb = l_comb

        self._shifts = np.arange(maxlag * 2 + 1) - maxlag
        self._kernel = compute_smooth_kernel(maxlag)
        self.loss_hist = None

    def fit(self, data, alg='mult'):
        """
		Fit a CNMF model to the data.

		Parameters
		----------
		data : array-like, shape (n_time, n_features)
			Training data to fit.
		alg : string {'mult', 'bcd'}, optional
			Algorithm used to fit the data.

		Returns
		-------
		self : object
			Returns the instance itself.
		"""
        # Check input
        if (data < 0).any():
            raise ValueError('Negative values in data to fit')

        mag = np.amax(data)
        data = ShiftMatrix(data, self.maxlag)
        m, n = data.shape

        # initialize W and H
        self.W = mag * np.abs(
            np.random.rand(self.maxlag * 2 + 1, m, self.n_components))
        self.H = ShiftMatrix(
            mag * np.abs(np.random.rand(self.n_components, n)), self.maxlag)

        # optimize
        if (alg == 'bcd_backtrack'):
            fit_bcd(data, self, step_type='backtrack')
        elif (alg == 'bcd_const'):
            fit_bcd(data, self, step_type='constant')
        elif (alg == 'mult'):
            fit_mult(data, self)
        else:
            raise ValueError('No such algorithm found.')

        # compute explanatory power of each factor
        loadings = compute_loadings(data, self.W, self.H, self._shifts)

        # sort factors by power
        ind = np.argsort(loadings)
        self.W = self.W[:, :, ind]
        self.H.assign(self.H.shift(0)[ind, :])

        return self

    def predict(self):
        """
		Return low-rank reconstruction of data.

		Returns
		-------
		est : array-like, shape (n_time, n_features)
			Reconstruction of the data using `W` and `H`.
		"""
        # check that W and H are fit
        self._check_is_fitted()

        return tensor_conv(self.W, self.H, self._shifts)

    def _check_is_fitted(self):
        """
		Check if `W`, `H` have been fitted.
		"""
        if self.W is None or self.H is None:
            raise ValueError('This ConvNMF instance is not fitted yet.'
                             'Call \'fit\' with appropriate arguments '
                             'before using this method.')