Exemplo n.º 1
0
def global_error(X, D, sparse_coder, n_class_atoms, n_jobs=1):
    """
    computes the approximation error of the dataset to
    each class-specific dictionary. The dataset is first encoded over the
    joint dictionary.
    """

    Z = sparse_coder(X, D)
    n_samples = X.shape[1]
    n_classes = len(n_class_atoms)
    E = np.zeros((n_classes, n_samples))

    if n_jobs > 1:
        set_openblas_threads(n_jobs)

    for c in range(n_classes):
        c_idx = get_class_atoms(c, n_class_atoms)
        E[c, :] = np.sum(np.power(fast_dot(D[:, c_idx], Z[c_idx, :]) - X, 2), axis=0)

    if n_jobs > 1:
        set_openblas_threads(1)

    return E
Exemplo n.º 2
0
    def __call__(self, X, D):
        from lyssa.utils import set_openblas_threads
        if self.verbose:
            msg = "feature encoding"
        else:
            msg = None

        n_atoms = D.shape[1]
        n_samples = X.shape[1]

        n_batches = 100
        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        if self.algorithm == 'soft_thresholding':
            Alpha = fast_dot(D.T, X)
            data = Alpha
            args = None
            batched_args = None
            func = partial(
                soft_thresholding,
                nonzero_percentage=self.params.get('nonzero_percentage'),
                n_nonzero_coefs=self.params.get('n_nonzero_coefs'))

        if self.n_jobs > 1:
            # disable OpenBLAS to
            # avoid hanging problem
            set_openblas_threads(1)

        Z = run_parallel(func=func,
                         data=data,
                         args=args,
                         batched_args=batched_args,
                         result_shape=(n_atoms, n_samples),
                         n_batches=n_batches,
                         mmap=self.mmap,
                         msg=msg,
                         n_jobs=self.n_jobs)

        # restore the previous setting
        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        return Z
Exemplo n.º 3
0
    def __call__(self, X, D):
        from lyssa.utils import set_openblas_threads
        if self.verbose:
            msg = "feature encoding"
        else:
            msg = None

        n_atoms = D.shape[1]
        n_samples = X.shape[1]

        n_batches = 100
        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        if self.algorithm == 'soft_thresholding':
            Alpha = fast_dot(D.T, X)
            data = Alpha
            args = None
            batched_args = None
            func = partial(soft_thresholding, nonzero_percentage=self.params.get('nonzero_percentage'),
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'))

        if self.n_jobs > 1:
            # disable OpenBLAS to
            # avoid hanging problem
            set_openblas_threads(1)

        Z = run_parallel(func=func, data=data, args=args, batched_args=batched_args,
                         result_shape=(n_atoms, n_samples), n_batches=n_batches,
                         mmap=self.mmap, msg=msg, n_jobs=self.n_jobs)

        # restore the previous setting
        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        return Z
Exemplo n.º 4
0
def ksvd_dict_learn(X, n_atoms, init_dict='data', sparse_coder=None,
                    max_iter=20, non_neg=False, approx=False, eta=None,
                    n_cycles=1, n_jobs=1, mmap=False, verbose=True):
    """
    The K-SVD algorithm

    X: the data matrix of shape (n_features,n_samples)
    n_atoms: the number of atoms in the dictionary
    sparse_coder: must be an instance of the sparse_coding.sparse_encoder class
    approx: if true, invokes the approximate KSVD algorithm
    max_iter: the maximum number of iterations
    non_neg: if set to True, it uses non-negativity constraints
    n_cycles: the number of updates per atom (Dictionary Update Cycles)
    n_jobs: the number of CPU threads
    mmap: if set to True, the algorithm applies memory mapping to save memory
    """
    n_features, n_samples = X.shape
    shape = (n_atoms, n_samples)
    Z = np.zeros(shape)
    # dictionary initialization
    # track the datapoints that are not used as atoms
    unused_data = []
    if init_dict == 'data':
        from .utils import init_dictionary
        D, unused_data = init_dictionary(X, n_atoms, method=init_dict, return_unused_data=True)
    else:
        D = np.copy(init_dict)

    if mmap:
        D = get_mmap(D)
        sparse_coder.mmap = True

    print "dictionary initialized"
    max_patience = 10
    error_curr = 0
    error_prev = 0
    it = 0
    patience = 0
    approx_errors = []

    while it < max_iter and patience < max_patience:
        print "----------------------------"
        print "iteration", it
        print ""
        it_start = time.time()
        if verbose:
            t_sparse_start = time.time()
        # sparse coding
        Z = sparse_coder(X, D)
        if verbose:
            t_sparse_duration = time.time() - t_sparse_start
            print "sparse coding took", t_sparse_duration, "seconds"
            t_dict_start = time.time()

        # ksvd to learn the dictionary
        set_openblas_threads(n_jobs)
        if approx:
            D, _, unused_atoms = approx_ksvd(X, D, Z, n_cycles=n_cycles)
        elif non_neg:
            D, _, unused_atoms = nn_ksvd(X, D, Z, n_cycles=it)
        else:
            D, _, unused_atoms = ksvd(X, D, Z, n_cycles=n_cycles)
        set_openblas_threads(1)
        if verbose:
            t_dict_duration = time.time() - t_dict_start
            print "K-SVD took", t_dict_duration, "seconds"
            print ""
        if verbose:
            print "number of unused atoms:", len(unused_atoms)
        # replace the unused atoms in the dictionary
        for j in range(len(unused_atoms)):
            # no datapoint available to be used as atom
            if len(unused_data) == 0:
                break
            _idx = np.random.choice(unused_data, size=1)
            idx = _idx[0]
            D[:, unused_atoms[j]] = X[:, idx]
            D[:, unused_atoms[j]] = normalize(D[:, unused_atoms[j]])
            unused_data.remove(idx)

        if eta is not None:
            # do not force incoherence in the last iteration
            if it < max_iter - 1:
                # force Mutual Incoherence
                D, unused_data = force_mi(D, X, Z, unused_data, eta)
        if verbose:
            amc = average_mutual_coherence(D)
            print "average mutual coherence:", amc

        it_duration = time.time() - it_start
        # calculate the approximation error
        error_curr = approx_error(D, Z, X, n_jobs=2)
        approx_errors.append(error_curr)
        if verbose:
            print "error:", error_curr
            print "error difference:", (error_curr - error_prev)
            error_prev = error_curr
        print "duration:", it_duration, "seconds"
        if (it > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev):
            patience += 1
        it += 1
    print ""
    return D, Z
Exemplo n.º 5
0
def projected_grad_desc(X,
                        n_atoms=None,
                        sparse_coder=None,
                        batch_size=None,
                        D_init=None,
                        eta=None,
                        mu=None,
                        n_epochs=None,
                        non_neg=False,
                        verbose=False,
                        n_jobs=1,
                        mmap=False):
    """
    X: the data matrix of shape (n_features,n_samples)
    n_atoms: the number of atoms in the dictionary
    sparse_coder: must be an instance of the sparse_coding.sparse_encoder class
    batch_size: the number of datapoints in each iteration
    D_init: the initial dictionary. If None, we initialize it with randomly
            selected datapoints.
    eta: the learning rate
    mu:  the mutual coherence penalty
    n_epochs: the number of times we iterate over the dataset
    non_neg: if set to True, it uses non-negativity constraints
    n_jobs: the number of CPU threads
    mmap: if set to True, the algorithm applies memory mapping to save memory

    Note that a	large batch_size implies
    faster execution but high memory overhead, while
    a smaller batch_size implies
    slower execution but low memory overhead
    """

    # dont monitor sparse coding
    sparse_coder.verbose = False
    n_features, n_samples = X.shape
    # initialize the dictionary
    # with the dataset
    if D_init is None:
        D, unused_data = init_dictionary(X,
                                         n_atoms,
                                         method='data',
                                         return_unused_data=True)
    else:
        D = D_init
    print "dictionary initialized"
    if mmap:
        D = get_mmap(D)

    batch_idx = gen_batches(n_samples, batch_size=batch_size)
    n_batches = len(batch_idx)
    n_iter = n_batches
    n_total_iter = n_epochs * n_iter
    I = np.eye(n_atoms)

    if n_batches > n_iter:
        print "will iterate on only {0:.2f}% of the dataset".format(
            (float(n_iter) / n_batches) * 100)

    if n_jobs > 1:
        set_openblas_threads(n_jobs)

    max_patience = 10
    error_curr = 0
    error_prev = 0
    patience = 0
    approx_errors = []
    incs = []
    for e in range(n_epochs):
        # cycle over the batches
        for i, batch in zip(range(n_iter), cycle(batch_idx)):
            X_batch = X[:, batch]
            # sparse coding step
            Z_batch = sparse_coder(X_batch, D)

            if verbose:
                progress = float((e * n_iter) + i) / n_total_iter
                sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" %
                                 (progress * 100))
                sys.stdout.flush()

            # the gradient of the approximation error
            grad_approx = np.dot(np.dot(D, Z_batch) - X_batch, Z_batch.T)
            # the gradient of the incoherence penalty
            if mu is not None and mu > 0:
                grad_incoh = 2 * mu * np.dot(D, np.dot(D.T, D) - I)
            else:
                grad_incoh = 0

            grad = grad_approx
            D = D - eta * grad
            # enforce non-negativity
            if non_neg:
                D[D < 0] = 0
            # project to l2 unit sphere
            D = norm_cols(D)
            # sparse coding
            Z = sparse_coder(X, D)
            from lyssa.dict_learning.utils import average_mutual_coherence
            approx_errors.append(approx_error(D, Z, X, n_jobs=n_jobs))
        # replace_unused_atoms(A,unused_data,i)

        if e < n_epochs - 1:
            print ""
            print "end of epoch {0}".format(e)
            error_curr = 0
            for i, batch in zip(range(n_iter), cycle(batch_idx)):
                X_batch = X[:, batch]
                # sparse coding step
                Z_batch = sparse_coder(X_batch, D)
                error_curr += approx_error(D, Z_batch, X_batch, n_jobs=n_jobs)
            if verbose:
                print ""
                print "error:", error_curr
                print "error difference:", (error_curr - error_prev)
                error_prev = error_curr
            if (e > 0) and (error_curr > 0.9 * error_prev
                            or error_curr > error_prev):
                patience += 1
            if patience >= max_patience:
                return D
    if verbose:
        sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (100))
        sys.stdout.flush()
        print ""
    return D
Exemplo n.º 6
0
    def __call__(self, X, D):
        # assume X has datapoints in columns
        # use self.params.get('key') because it does not throw exception
        # when the key does not exist, it just returns None.
        from lyssa.utils import set_openblas_threads

        n_samples = X.shape[1]
        n_atoms = D.shape[1]
        n_batches = 100

        if self.params.get('lambda') is not None:
            assert self.params.get('lambda') <= n_atoms

        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        batched_args = None

        if self.algorithm == 'omp':
            Gram = fast_dot(D.T, D)
            args = [D, Gram]
            Alpha = fast_dot(D.T, X)
            batched_args = [Alpha]
            data = X
            func = partial(omp,
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           tol=self.params.get('tol'))

        elif self.algorithm == 'bomp':
            Gram = fast_dot(D.T, D)
            Alpha = fast_dot(D.T, X)
            batched_args = [Alpha]
            args = [D, Gram]
            data = X
            func = partial(batch_omp,
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           tol=self.params.get('tol'))

        elif self.algorithm == 'thresh':
            Alpha = fast_dot(D.T, X)
            data = Alpha
            args = []
            func = partial(
                thresholding,
                n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                nonzero_percentage=self.params.get('nonzero_percentage'))

        elif self.algorithm == "nnomp":
            args = [D]
            data = X
            func = partial(nn_omp,
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           tol=self.params.get('tol'))

        elif self.algorithm == 'group_omp':
            Gram = fast_dot(D.T, D)
            Alpha = fast_dot(D.T, X)
            batched_args = [Alpha]
            data = X
            func = partial(group_omp,
                           groups=self.params.get('groups'),
                           n_groups=self.params.get('n_groups'))
            args = [D, Gram]

        elif self.algorithm == 'sparse_group_omp':
            # group_omp(X,D,Gram,groups=None,n_groups=None)
            Gram = fast_dot(D.T, D)
            data = X
            # sparse_group_omp(X,D,Gram,groups=None,n_groups=None,n_nonzero_coefs=None)
            func = partial(sparse_group_omp,
                           groups=self.params.get('groups'),
                           n_groups=self.params.get('n_groups'),
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'))
            args = [D, Gram]

        elif self.algorithm == 'somp':
            Gram = fast_dot(D.T, D)
            data = X
            func = partial(somp,
                           data_groups=self.params.get('data_groups'),
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'))
            args = [D, Gram]

        elif self.algorithm == 'iht':
            Alpha = fast_dot(D.T, X)
            data = Alpha
            args = []
            func = partial(
                thresholding,
                n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                nonzero_percentage=self.params.get('nonzero_percentage'))

            Z0 = run_parallel(func=func,
                              data=data,
                              args=args,
                              batched_args=batched_args,
                              result_shape=(n_atoms, n_samples),
                              n_batches=n_batches,
                              mmap=self.mmap,
                              n_jobs=self.n_jobs)

            R0 = fast_dot(D, Z0) - X
            data = X
            batched_args = [Z0, R0]
            args = [D]
            # iterative_hard_thresh(X,Z0,Alpha,D,eta=None,n_nonzero_coefs=None,n_iter=None)
            func = partial(iterative_hard_thresh,
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           eta=self.params.get('eta'),
                           n_iter=self.params.get('n_iter'))
            """params = sparse_coder['iterative_hard_thresh']
            learning_rate = params[0]
            threshold = params[1]
            max_iter = params[2]
            Z = iterative_hard_thresh(X,D,Z,learning_rate=learning_rate,threshold = threshold,max_iter = max_iter)
            """

        elif self.algorithm == 'lasso':
            return lasso(self.params.get('lambda'), self.n_jobs)(X, D)

        elif self.algorithm == 'llc':
            func = partial(llc, knn=self.params.get('knn'))
            data = X
            args = [D]
        else:
            raise ValueError("Sparse optimizer not found.")

        if self.verbose:
            msg = "sparse coding"
        else:
            msg = None

        if self.n_jobs > 1:
            # disable OpenBLAS to
            # avoid the hanging problem
            set_openblas_threads(1)

        Z = run_parallel(func=func,
                         data=data,
                         args=args,
                         batched_args=batched_args,
                         result_shape=(n_atoms, n_samples),
                         n_batches=n_batches,
                         mmap=self.mmap,
                         msg=msg,
                         n_jobs=self.n_jobs)

        # restore the previous setting
        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        return Z
Exemplo n.º 7
0
def lc_ksvd(X,
            y,
            D,
            Q,
            alpha=1,
            beta=1,
            lambda1=1,
            lambda2=1,
            init_dict='data',
            sparse_coder=None,
            max_iter=2,
            approx=False,
            mmap=False,
            verbose=False,
            n_jobs=1):
    """
    X: the data matrix with shape (n_features,n_samples)
    y: the vector that contains the label of each datapoint
    Q: a matrix with shape (n_atoms,n_samples). The element Q_{k,i} is 1 if the ith datapoint and the k atom belong to the same class
    lambda1: the regularizer for the W matrix i.e lambda1 * ||W||_{2}
    lambda2: the regularizer for the transformation matrix G i.e lambda2 * ||G||_{2}
    alpha: the weight we assign for sparse code discrimination
    beta: is the weight we assign for correct classification: beta*||H - WZ||_{2}
    """

    n_classes = len(set(y))
    n_atoms = D.shape[1]
    n_features, n_samples = X.shape
    Z = np.zeros((n_atoms, n_samples))

    # create the class label matrix
    # H is the class label matrix which has a
    # datapoint in each column with H_{c,i}=1 if
    # the ith datapoint belongs to the cth class
    H = np.zeros((n_classes, n_samples)).astype(int)

    for i in xrange(n_samples):
        H[y[i], i] = 1

    if n_jobs > 1:
        set_openblas_threads(n_jobs)
    # classifier parameter initialization
    I = np.eye(n_atoms)

    # W_{c,:} are the parameters of the linear classifier for the cth class
    W = np.dot(inv(np.dot(Z, Z.T) + lambda1 * I), np.dot(Z, H.T)).T
    # The matrix G forces the sparse codes to be discriminative and approximate the matrix Q,
    # and has shape (n_atoms,n_atoms)
    G = np.dot(inv(np.dot(Z, Z.T) + lambda2 * I), np.dot(Z, Q.T)).T

    # stack the data matrix X with class label matrix H
    # and matrix Q
    _X = np.vstack((X, np.sqrt(alpha) * Q))
    _X = np.vstack((_X, np.sqrt(beta) * H))

    if mmap:
        _X = get_mmap(_X)

    _normalizer = np.array(
        [np.sqrt(np.dot(D[:, j], D[:, j])) for j in range(D.shape[1])])
    D = D / _normalizer
    G = G / _normalizer
    W = W / _normalizer

    # stack the dictionary D with the weight matrix W
    # and matrix G
    _D = np.vstack((D, np.sqrt(alpha) * G))
    _D = np.vstack((_D, np.sqrt(beta) * W))

    if mmap:
        _D = get_mmap(_D)

    if verbose:
        error_curr = 0
        error_prev = 0

    for it in range(max_iter):

        print "iteration", it
        it_start = time.time()
        if verbose:
            t_sparse_start = time.time()
        # sparse coding
        Z = sparse_coder(X, D)
        if verbose:
            t_sparse_duration = time.time() - t_sparse_start
            print "\nsparse coding took", t_sparse_duration, "seconds"
            t_dict_start = time.time()

        _D, _, unused_atoms = ksvd(_X, _D, Z, verbose=True)

        if verbose:
            t_dict_duration = time.time() - t_dict_start
            print "\nK-SVD took", t_dict_duration, "seconds"
        if verbose:
            print "number of unused atoms:", len(unused_atoms)

        D = _D[:n_features, :]
        G = _D[n_features:n_features + n_atoms, :]
        W = _D[n_features + n_atoms:, :]

        _normalizer = np.array(
            [np.sqrt(np.dot(D[:, j], D[:, j])) for j in range(D.shape[1])])

        D = D / _normalizer
        G = G / _normalizer
        W = W / _normalizer
        # stack the dictionary D with the weight matrix W
        # and matrix G
        _D = np.vstack((D, np.sqrt(alpha) * G))
        _D = np.vstack((_D, np.sqrt(beta) * W))

        it_duration = time.time() - it_start
        if verbose:
            # calculate the approximation error
            error_curr = approx_error(D, Z, X, n_jobs=2)
            print "error:", error_curr
            print "error difference:", (error_curr - error_prev)
            n_correct = np.array([
                y[i] == np.argmax(np.dot(W, Z[:, i]))
                for i in range(Z.shape[1])
            ]).nonzero()[0].size
            class_acc = n_correct / float(n_samples)
            print "classification accuracy", class_acc
            error_prev = error_curr
        print "duration:", it_duration, "seconds"
        print "----------------------"

    return D, Z, W
Exemplo n.º 8
0
def online_dict_learn(X, n_atoms, sparse_coder=None, batch_size=None, A=None, B=None, D_init=None,
                      beta=None, n_epochs=1, verbose=False, n_jobs=1, non_neg=False, mmap=False):
    """
    X: the data matrix of shape (n_features,n_samples)
    n_atoms: the number of atoms in the dictionary
    sparse_coder: must be an instance of the sparse_coding.sparse_encoder class
    batch_size: the number of datapoints in each iteration
    D_init: the initial dictionary. If None, we initialize it with randomly
            selected datapoints.
    eta: the learning rate
    mu:  the mutual coherence penalty
    n_epochs: the number of times we iterate over the dataset
    non_neg: if set to True, it uses non-negativity constraints
    n_jobs: the number of CPU threads
    mmap: if set to True, the algorithm applies memory mapping to save memory

    Note that a	large batch_size implies
    faster execution but high memory overhead, while
    a smaller batch_size implies
    slower execution but low memory overhead
    """

    # dont monitor sparse coding
    sparse_coder.verbose = False
    n_features, n_samples = X.shape
    # initialize using the data
    if D_init is None:
        D, unused_data = init_dictionary(X, n_atoms, method='data', return_unused_data=True)
    else:
        D = D_init
    print "dictionary initialized"
    if mmap:
        D = get_mmap(D)

    batch_idx = gen_batches(n_samples, batch_size=batch_size)
    n_batches = len(batch_idx)
    n_iter = n_batches
    n_total_iter = n_epochs * n_iter
    _eps = np.finfo(float).eps

    if n_jobs > 1:
        set_openblas_threads(n_jobs)

    if A is None and B is None:
        A = np.zeros((n_atoms, n_atoms))
        B = np.zeros((n_features, n_atoms))

    if beta is None:
        # create a sequence that converges to one
        beta = np.linspace(0, 1, num=n_iter)
    else:
        beta = np.zeros(n_iter) + beta

    max_patience = 10
    error_curr = 0
    error_prev = 0
    patience = 0
    approx_errors = []
    incs = []
    for e in range(n_epochs):
        # cycle over the batches
        for i, batch in zip(range(n_iter), cycle(batch_idx)):
            X_batch = X[:, batch]
            # sparse coding step
            Z_batch = sparse_coder(X_batch, D)
            # update A and B
            A = beta[i] * A + fast_dot(Z_batch, Z_batch.T)
            B = beta[i] * B + fast_dot(X_batch, Z_batch.T)
            if verbose:
                progress = float((e * n_iter) + i) / n_total_iter
                sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (progress * 100))
                sys.stdout.flush()

            DA = fast_dot(D, A)
            # this part could also be parallelized w.r.t the atoms
            for k in xrange(n_atoms):
                D[:, k] = (1 / (A[k, k] + _eps)) * (B[:, k] - DA[:, k]) + D[:, k]
            # enforce non-negativity constraints
            if non_neg:
                D[D < 0] = 0
            D = norm_cols(D)
        # replace_unused_atoms(A,unused_data,i)

        if e < n_epochs - 1:
            if patience >= max_patience:
                return D, A, B
            print ""
            print "end of epoch {0}".format(e)
            error_curr = 0
            for i, batch in zip(range(n_iter), cycle(batch_idx)):
                X_batch = X[:, batch]
                # sparse coding step
                Z_batch = sparse_coder(X_batch, D)
                error_curr += approx_error(D, Z_batch, X_batch, n_jobs=n_jobs)
            if verbose:
                print ""
                print "error:", error_curr
                print "error difference:", (error_curr - error_prev)
                error_prev = error_curr
            if (e > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev):
                patience += 1

    if verbose:
        sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (100))
        sys.stdout.flush()
        print ""
    return D, A, B
Exemplo n.º 9
0
def ksvd_dict_learn(X, n_atoms, init_dict='data', sparse_coder=None,
                    max_iter=20, non_neg=False, approx=False, eta=None,
                    n_cycles=1, n_jobs=1, mmap=False, verbose=True):
    """
    The K-SVD algorithm

    X: the data matrix of shape (n_features,n_samples)
    n_atoms: the number of atoms in the dictionary
    sparse_coder: must be an instance of the sparse_coding.sparse_encoder class
    approx: if true, invokes the approximate KSVD algorithm
    max_iter: the maximum number of iterations
    non_neg: if set to True, it uses non-negativity constraints
    n_cycles: the number of updates per atom (Dictionary Update Cycles)
    n_jobs: the number of CPU threads
    mmap: if set to True, the algorithm applies memory mapping to save memory
    """
    n_features, n_samples = X.shape
    shape = (n_atoms, n_samples)
    Z = np.zeros(shape)
    # dictionary initialization
    # track the datapoints that are not used as atoms
    unused_data = []
    if init_dict == 'data':
        from .utils import init_dictionary
        D, unused_data = init_dictionary(X, n_atoms, method=init_dict, return_unused_data=True)
    else:
        D = np.copy(init_dict)

    if mmap:
        D = get_mmap(D)
        sparse_coder.mmap = True

    print "dictionary initialized"
    max_patience = 10
    error_curr = 0
    error_prev = 0
    it = 0
    patience = 0
    approx_errors = []

    while it < max_iter and patience < max_patience:
        print "----------------------------"
        print "iteration", it
        print ""
        it_start = time.time()
        if verbose:
            t_sparse_start = time.time()
        # sparse coding
        Z = sparse_coder(X, D)
        if verbose:
            t_sparse_duration = time.time() - t_sparse_start
            print "sparse coding took", t_sparse_duration, "seconds"
            t_dict_start = time.time()

        # ksvd to learn the dictionary
        set_openblas_threads(n_jobs)
        if approx:
            D, _, unused_atoms = approx_ksvd(X, D, Z, n_cycles=n_cycles)
        elif non_neg:
            D, _, unused_atoms = nn_ksvd(X, D, Z, n_cycles=it)
        else:
            D, _, unused_atoms = ksvd(X, D, Z, n_cycles=n_cycles)
        set_openblas_threads(1)
        if verbose:
            t_dict_duration = time.time() - t_dict_start
            print "K-SVD took", t_dict_duration, "seconds"
            print ""
        if verbose:
            print "number of unused atoms:", len(unused_atoms)
        # replace the unused atoms in the dictionary
        for j in range(len(unused_atoms)):
            # no datapoint available to be used as atom
            if len(unused_data) == 0:
                break
            _idx = np.random.choice(unused_data, size=1)
            idx = _idx[0]
            D[:, unused_atoms[j]] = X[:, idx]
            D[:, unused_atoms[j]] = normalize(D[:, unused_atoms[j]])
            unused_data.remove(idx)

        if eta is not None:
            # do not force incoherence in the last iteration
            if it < max_iter - 1:
                # force Mutual Incoherence
                D, unused_data = force_mi(D, X, Z, unused_data, eta)
        if verbose:
            amc = average_mutual_coherence(D)
            print "average mutual coherence:", amc

        it_duration = time.time() - it_start
        # calculate the approximation error
        error_curr = approx_error(D, Z, X, n_jobs=2)
        approx_errors.append(error_curr)
        if verbose:
            print "error:", error_curr
            print "error difference:", (error_curr - error_prev)
            error_prev = error_curr
        print "duration:", it_duration, "seconds"
        if (it > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev):
            patience += 1
        it += 1
    print ""
    return D, Z
Exemplo n.º 10
0
def approx_error(D, Z, X, n_jobs=1):
    """computes the approximation error ||X-DZ||_{F}^{2} """
    if n_jobs > 1:
        set_openblas_threads(n_jobs)
    error = frobenius_squared(X - fast_dot(D, Z))
    return error
Exemplo n.º 11
0
    def __call__(self, X, D):
        # assume X has datapoints in columns
        # use self.params.get('key') because it does not throw exception
        # when the key does not exist, it just returns None.
        from lyssa.utils import set_openblas_threads

        n_samples = X.shape[1]
        n_atoms = D.shape[1]
        n_batches = 100

        if self.params.get('lambda') is not None:
            assert self.params.get('lambda') <= n_atoms

        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        batched_args = None

        if self.algorithm == 'omp':
            Gram = fast_dot(D.T, D)
            args = [D, Gram]
            Alpha = fast_dot(D.T, X)
            batched_args = [Alpha]
            data = X
            func = partial(omp, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), tol=self.params.get('tol'))

        elif self.algorithm == 'bomp':
            Gram = fast_dot(D.T, D)
            Alpha = fast_dot(D.T, X)
            batched_args = [Alpha]
            args = [D, Gram]
            data = X
            func = partial(batch_omp, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), tol=self.params.get('tol'))

        elif self.algorithm == 'thresh':
            Alpha = fast_dot(D.T, X)
            data = Alpha
            args = []
            func = partial(thresholding, n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           nonzero_percentage=self.params.get('nonzero_percentage'))

        elif self.algorithm == "nnomp":
            args = [D]
            data = X
            func = partial(nn_omp, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), tol=self.params.get('tol'))

        elif self.algorithm == 'group_omp':
            Gram = fast_dot(D.T, D)
            Alpha = fast_dot(D.T, X)
            batched_args = [Alpha]
            data = X
            func = partial(group_omp, groups=self.params.get('groups'), n_groups=self.params.get('n_groups'))
            args = [D, Gram]

        elif self.algorithm == 'sparse_group_omp':
            # group_omp(X,D,Gram,groups=None,n_groups=None)
            Gram = fast_dot(D.T, D)
            data = X
            # sparse_group_omp(X,D,Gram,groups=None,n_groups=None,n_nonzero_coefs=None)
            func = partial(sparse_group_omp, groups=self.params.get('groups'), n_groups=self.params.get('n_groups'),
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'))
            args = [D, Gram]

        elif self.algorithm == 'somp':
            Gram = fast_dot(D.T, D)
            data = X
            func = partial(somp, data_groups=self.params.get('data_groups'),
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'))
            args = [D, Gram]

        elif self.algorithm == 'iht':
            Alpha = fast_dot(D.T, X)
            data = Alpha
            args = []
            func = partial(thresholding, n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           nonzero_percentage=self.params.get('nonzero_percentage'))

            Z0 = run_parallel(func=func, data=data, args=args, batched_args=batched_args,
                              result_shape=(n_atoms, n_samples), n_batches=n_batches,
                              mmap=self.mmap, n_jobs=self.n_jobs)

            R0 = fast_dot(D, Z0) - X
            data = X
            batched_args = [Z0, R0]
            args = [D]
            # iterative_hard_thresh(X,Z0,Alpha,D,eta=None,n_nonzero_coefs=None,n_iter=None)
            func = partial(iterative_hard_thresh, n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           eta=self.params.get('eta'), n_iter=self.params.get('n_iter'))
            """params = sparse_coder['iterative_hard_thresh']
            learning_rate = params[0]
            threshold = params[1]
            max_iter = params[2]
            Z = iterative_hard_thresh(X,D,Z,learning_rate=learning_rate,threshold = threshold,max_iter = max_iter)
            """

        elif self.algorithm == 'lasso':
            return lasso(self.params.get('lambda'), self.n_jobs)(X, D)

        elif self.algorithm == 'llc':
            func = partial(llc, knn=self.params.get('knn'))
            data = X
            args = [D]
        else:
            raise ValueError("Sparse optimizer not found.")

        if self.verbose:
            msg = "sparse coding"
        else:
            msg = None

        if self.n_jobs > 1:
            # disable OpenBLAS to
            # avoid the hanging problem
            set_openblas_threads(1)

        Z = run_parallel(func=func, data=data, args=args, batched_args=batched_args,
                         result_shape=(n_atoms, n_samples), n_batches=n_batches,
                         mmap=self.mmap, msg=msg, n_jobs=self.n_jobs)

        # restore the previous setting
        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        return Z
Exemplo n.º 12
0
def approx_error(D, Z, X, n_jobs=1):
    """computes the approximation error ||X-DZ||_{F}^{2} """
    if n_jobs > 1:
        set_openblas_threads(n_jobs)
    error = frobenius_squared(X - fast_dot(D, Z))
    return error
Exemplo n.º 13
0
def online_dict_learn(X, n_atoms, sparse_coder=None, batch_size=None, A=None, B=None, D_init=None,
                      beta=None, n_epochs=1, verbose=False, n_jobs=1, non_neg=False, mmap=False):
    """
    X: the data matrix of shape (n_features,n_samples)
    n_atoms: the number of atoms in the dictionary
    sparse_coder: must be an instance of the sparse_coding.sparse_encoder class
    batch_size: the number of datapoints in each iteration
    D_init: the initial dictionary. If None, we initialize it with randomly
            selected datapoints.
    eta: the learning rate
    mu:  the mutual coherence penalty
    n_epochs: the number of times we iterate over the dataset
    non_neg: if set to True, it uses non-negativity constraints
    n_jobs: the number of CPU threads
    mmap: if set to True, the algorithm applies memory mapping to save memory

    Note that a	large batch_size implies
    faster execution but high memory overhead, while
    a smaller batch_size implies
    slower execution but low memory overhead
    """

    # dont monitor sparse coding
    sparse_coder.verbose = False
    n_features, n_samples = X.shape
    # initialize using the data
    if D_init is None:
        D, unused_data = init_dictionary(X, n_atoms, method='data', return_unused_data=True)
    else:
        D = D_init
    print "dictionary initialized"
    if mmap:
        D = get_mmap(D)

    batch_idx = gen_batches(n_samples, batch_size=batch_size)
    n_batches = len(batch_idx)
    n_iter = n_batches
    n_total_iter = n_epochs * n_iter
    _eps = np.finfo(float).eps

    if n_jobs > 1:
        set_openblas_threads(n_jobs)

    if A is None and B is None:
        A = np.zeros((n_atoms, n_atoms))
        B = np.zeros((n_features, n_atoms))

    if beta is None:
        # create a sequence that converges to one
        beta = np.linspace(0, 1, num=n_iter)
    else:
        beta = np.zeros(n_iter) + beta

    max_patience = 10
    error_curr = 0
    error_prev = 0
    patience = 0
    approx_errors = []
    incs = []
    for e in range(n_epochs):
        # cycle over the batches
        for i, batch in zip(range(n_iter), cycle(batch_idx)):
            X_batch = X[:, batch]
            # sparse coding step
            Z_batch = sparse_coder(X_batch, D)
            # update A and B
            A = beta[i] * A + fast_dot(Z_batch, Z_batch.T)
            B = beta[i] * B + fast_dot(X_batch, Z_batch.T)
            if verbose:
                progress = float((e * n_iter) + i) / n_total_iter
                sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (progress * 100))
                sys.stdout.flush()

            DA = fast_dot(D, A)
            # this part could also be parallelized w.r.t the atoms
            for k in xrange(n_atoms):
                D[:, k] = (1 / (A[k, k] + _eps)) * (B[:, k] - DA[:, k]) + D[:, k]
            # enforce non-negativity constraints
            if non_neg:
                D[D < 0] = 0
            D = norm_cols(D)
        # replace_unused_atoms(A,unused_data,i)

        if e < n_epochs - 1:
            if patience >= max_patience:
                return D, A, B
            print ""
            print "end of epoch {0}".format(e)
            error_curr = 0
            for i, batch in zip(range(n_iter), cycle(batch_idx)):
                X_batch = X[:, batch]
                # sparse coding step
                Z_batch = sparse_coder(X_batch, D)
                error_curr += approx_error(D, Z_batch, X_batch, n_jobs=n_jobs)
            if verbose:
                print ""
                print "error:", error_curr
                print "error difference:", (error_curr - error_prev)
                error_prev = error_curr
            if (e > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev):
                patience += 1

    if verbose:
        sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (100))
        sys.stdout.flush()
        print ""
    return D, A, B
Exemplo n.º 14
0
def lc_ksvd(X, y, D, Q, alpha=1, beta=1, lambda1=1, lambda2=1,
            sparse_coder=None, max_iter=2, approx=False, mmap=False, verbose=False, n_jobs=1):
    """
    X: the data matrix with shape (n_features,n_samples)
    y: the vector that contains the label of each datapoint
    Q: a matrix with shape (n_atoms,n_samples). The element Q_{k,i} is 1 if the ith datapoint and the k atom belong to the same class
    lambda1: the regularizer for the W matrix i.e lambda1 * ||W||_{2}
    lambda2: the regularizer for the transformation matrix G i.e lambda2 * ||G||_{2}
    alpha: the weight we assign for sparse code discrimination
    beta: is the weight we assign for correct classification: beta*||H - WZ||_{2}
    """

    n_classes = len(set(y))
    n_atoms = D.shape[1]
    n_features, n_samples = X.shape
    Z = np.zeros((n_atoms, n_samples))

    # create the class label matrix
    # H is the class label matrix which has a
    # datapoint in each column with H_{c,i}=1 if
    # the ith datapoint belongs to the cth class
    H = np.zeros((n_classes, n_samples)).astype(int)

    for i in xrange(n_samples):
        H[y[i], i] = 1

    if n_jobs > 1:
        set_openblas_threads(n_jobs)
    # classifier parameter initialization
    I = np.eye(n_atoms)

    # W_{c,:} are the parameters of the linear classifier for the cth class
    W = np.dot(inv(np.dot(Z, Z.T) + lambda1 * I), np.dot(Z, H.T)).T
    # The matrix G forces the sparse codes to be discriminative and approximate the matrix Q,
    # and has shape (n_atoms,n_atoms)
    G = np.dot(inv(np.dot(Z, Z.T) + lambda2 * I), np.dot(Z, Q.T)).T

    # stack the data matrix X with class label matrix H
    # and matrix Q
    _X = np.vstack((X, np.sqrt(alpha) * Q))
    _X = np.vstack((_X, np.sqrt(beta) * H))

    if mmap:
        _X = get_mmap(_X)

    _normalizer = np.array([np.sqrt(np.dot(D[:, j], D[:, j])) for j in range(D.shape[1])])
    D = D / _normalizer
    G = G / _normalizer
    W = W / _normalizer

    # stack the dictionary D with the weight matrix W
    # and matrix G
    _D = np.vstack((D, np.sqrt(alpha) * G))
    _D = np.vstack((_D, np.sqrt(beta) * W))

    if mmap:
        _D = get_mmap(_D)

    if verbose:
        error_curr = 0
        error_prev = 0

    for it in range(max_iter):

        print "iteration", it
        it_start = time.time()
        if verbose:
            t_sparse_start = time.time()
        # sparse coding
        Z = sparse_coder(X, D)
        if verbose:
            t_sparse_duration = time.time() - t_sparse_start
            print "\nsparse coding took", t_sparse_duration, "seconds"
            t_dict_start = time.time()

        _D, _, unused_atoms = ksvd(_X, _D, Z, verbose=True)

        if verbose:
            t_dict_duration = time.time() - t_dict_start
            print "\nK-SVD took", t_dict_duration, "seconds"
        if verbose:
            print "number of unused atoms:", len(unused_atoms)

        D = _D[:n_features, :]
        G = _D[n_features:n_features + n_atoms, :]
        W = _D[n_features + n_atoms:, :]

        _normalizer = np.array([np.sqrt(np.dot(D[:, j], D[:, j])) for j in range(D.shape[1])])

        D = D / _normalizer
        G = G / _normalizer
        W = W / _normalizer
        # stack the dictionary D with the weight matrix W
        # and matrix G
        _D = np.vstack((D, np.sqrt(alpha) * G))
        _D = np.vstack((_D, np.sqrt(beta) * W))

        it_duration = time.time() - it_start
        if verbose:
            # calculate the approximation error
            error_curr = approx_error(D, Z, X, n_jobs=2)
            print "error:", error_curr
            print "error difference:", (error_curr - error_prev)
            n_correct = np.array([y[i] == np.argmax(np.dot(W, Z[:, i]))
                                  for i in range(Z.shape[1])]).nonzero()[0].size
            class_acc = n_correct / float(n_samples)
            print "classification accuracy", class_acc
            error_prev = error_curr
        print "duration:", it_duration, "seconds"
        print "----------------------"

    return D, Z, W
Exemplo n.º 15
0
def projected_grad_desc(X, n_atoms=None, sparse_coder=None, batch_size=None, D_init=None,
                        eta=None, mu=None, n_epochs=None, non_neg=False, verbose=False, n_jobs=1, mmap=False):
    """
    X: the data matrix of shape (n_features,n_samples)
    n_atoms: the number of atoms in the dictionary
    sparse_coder: must be an instance of the sparse_coding.sparse_encoder class
    batch_size: the number of datapoints in each iteration
    D_init: the initial dictionary. If None, we initialize it with randomly
            selected datapoints.
    eta: the learning rate
    mu:  the mutual coherence penalty
    n_epochs: the number of times we iterate over the dataset
    non_neg: if set to True, it uses non-negativity constraints
    n_jobs: the number of CPU threads
    mmap: if set to True, the algorithm applies memory mapping to save memory

    Note that a	large batch_size implies
    faster execution but high memory overhead, while
    a smaller batch_size implies
    slower execution but low memory overhead
    """

    if eta is None:
        raise ValueError('Must specify learning rate.')

    # don't monitor sparse coding
    sparse_coder.verbose = False
    n_features, n_samples = X.shape
    # initialize the dictionary
    # with the dataset
    if D_init is None:
        D, unused_data = init_dictionary(X, n_atoms, method='data', return_unused_data=True)
    else:
        D = D_init
    print "dictionary initialized"
    if mmap:
        D = get_mmap(D)

    batch_idx = gen_batches(n_samples, batch_size=batch_size)
    n_batches = len(batch_idx)
    n_iter = n_batches
    n_total_iter = n_epochs * n_iter
    I = np.eye(n_atoms)

    if n_batches > n_iter:
        print "will iterate on only {0:.2f}% of the dataset".format((float(n_iter) / n_batches) * 100)

    if n_jobs > 1:
        set_openblas_threads(n_jobs)

    max_patience = 10
    error_prev = 0
    patience = 0
    approx_errors = []
    for e in range(n_epochs):
        # cycle over the batches
        for i, batch in zip(range(n_iter), cycle(batch_idx)):
            X_batch = X[:, batch]
            # sparse coding step
            Z_batch = sparse_coder(X_batch, D)

            if verbose:
                progress = float((e * n_iter) + i) / n_total_iter
                sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (progress * 100))
                sys.stdout.flush()

            # the gradient of the approximation error
            grad_approx = np.dot(np.dot(D, Z_batch) - X_batch, Z_batch.T)
            # the gradient of the incoherence penalty
            if mu is not None and mu > 0:
                grad_incoh = 2 * mu * np.dot(D, np.dot(D.T, D) - I)
            else:
                grad_incoh = 0

            grad = grad_approx
            D = D - (eta * grad) + grad_incoh
            # enforce non-negativity
            if non_neg:
                D[D < 0] = 0
            # project to l2 unit sphere
            D = norm_cols(D)
            # sparse coding
            Z = sparse_coder(X, D)
        #replace_unused_atoms(A,unused_data,i)

        if e < n_epochs - 1:
            print ""
            print "end of epoch {0}".format(e)
            error_curr = 0
            for i, batch in zip(range(n_iter), cycle(batch_idx)):
                X_batch = X[:, batch]
                # sparse coding step
                Z_batch = sparse_coder(X_batch, D)
                error_curr += approx_error(D, Z_batch, X_batch, n_jobs=n_jobs)
            if verbose:
                print ""
                print "error:", error_curr
                print "error difference:", (error_curr - error_prev)
                error_prev = error_curr
            if (e > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev):
                patience += 1
            if patience >= max_patience:
                return D
    if verbose:
        sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (100))
        sys.stdout.flush()
        print ""
    return D