示例#1
0
    def dict_learn(self, imgs, feature_extractor=None, dict_learner=None):
        if not self.workspace.contains("descriptors.npy"):
            self.descriptors = feature_extractor(imgs)
            self.workspace.save("descriptors.npy", self.descriptors)
        else:
            self.descriptors = self.workspace.load("descriptors.npy")

        if self.mmap:
            self.descriptors = get_mmap(self.descriptors)

        print "descriptors extracted"
        if not self.workspace.contains("dict.npy"):
            dict_learner.fit(self.descriptors)
            self.D = dict_learner.D
            self.workspace.save("dict.npy", self.D)
        else:
            self.D = self.workspace.load("dict.npy")
示例#2
0
    def dict_learn(self, imgs, feature_extractor=None, dict_learner=None):
        if not self.workspace.contains("descriptors.npy"):
            self.descriptors = feature_extractor(imgs)
            self.workspace.save("descriptors.npy", self.descriptors)
        else:
            self.descriptors = self.workspace.load("descriptors.npy")

        if self.mmap:
            self.descriptors = get_mmap(self.descriptors)

        print "descriptors extracted"
        if not self.workspace.contains("dict.npy"):
            dict_learner.fit(self.descriptors)
            self.D = dict_learner.D
            self.workspace.save("dict.npy", self.D)
        else:
            self.D = self.workspace.load("dict.npy")
示例#3
0
def ksvd_dict_learn(X, n_atoms, init_dict='data', sparse_coder=None,
                    max_iter=20, non_neg=False, approx=False, eta=None,
                    n_cycles=1, n_jobs=1, mmap=False, verbose=True):
    """
    The K-SVD algorithm

    X: the data matrix of shape (n_features,n_samples)
    n_atoms: the number of atoms in the dictionary
    sparse_coder: must be an instance of the sparse_coding.sparse_encoder class
    approx: if true, invokes the approximate KSVD algorithm
    max_iter: the maximum number of iterations
    non_neg: if set to True, it uses non-negativity constraints
    n_cycles: the number of updates per atom (Dictionary Update Cycles)
    n_jobs: the number of CPU threads
    mmap: if set to True, the algorithm applies memory mapping to save memory
    """
    n_features, n_samples = X.shape
    shape = (n_atoms, n_samples)
    Z = np.zeros(shape)
    # dictionary initialization
    # track the datapoints that are not used as atoms
    unused_data = []
    if init_dict == 'data':
        from .utils import init_dictionary
        D, unused_data = init_dictionary(X, n_atoms, method=init_dict, return_unused_data=True)
    else:
        D = np.copy(init_dict)

    if mmap:
        D = get_mmap(D)
        sparse_coder.mmap = True

    print "dictionary initialized"
    max_patience = 10
    error_curr = 0
    error_prev = 0
    it = 0
    patience = 0
    approx_errors = []

    while it < max_iter and patience < max_patience:
        print "----------------------------"
        print "iteration", it
        print ""
        it_start = time.time()
        if verbose:
            t_sparse_start = time.time()
        # sparse coding
        Z = sparse_coder(X, D)
        if verbose:
            t_sparse_duration = time.time() - t_sparse_start
            print "sparse coding took", t_sparse_duration, "seconds"
            t_dict_start = time.time()

        # ksvd to learn the dictionary
        set_openblas_threads(n_jobs)
        if approx:
            D, _, unused_atoms = approx_ksvd(X, D, Z, n_cycles=n_cycles)
        elif non_neg:
            D, _, unused_atoms = nn_ksvd(X, D, Z, n_cycles=it)
        else:
            D, _, unused_atoms = ksvd(X, D, Z, n_cycles=n_cycles)
        set_openblas_threads(1)
        if verbose:
            t_dict_duration = time.time() - t_dict_start
            print "K-SVD took", t_dict_duration, "seconds"
            print ""
        if verbose:
            print "number of unused atoms:", len(unused_atoms)
        # replace the unused atoms in the dictionary
        for j in range(len(unused_atoms)):
            # no datapoint available to be used as atom
            if len(unused_data) == 0:
                break
            _idx = np.random.choice(unused_data, size=1)
            idx = _idx[0]
            D[:, unused_atoms[j]] = X[:, idx]
            D[:, unused_atoms[j]] = normalize(D[:, unused_atoms[j]])
            unused_data.remove(idx)

        if eta is not None:
            # do not force incoherence in the last iteration
            if it < max_iter - 1:
                # force Mutual Incoherence
                D, unused_data = force_mi(D, X, Z, unused_data, eta)
        if verbose:
            amc = average_mutual_coherence(D)
            print "average mutual coherence:", amc

        it_duration = time.time() - it_start
        # calculate the approximation error
        error_curr = approx_error(D, Z, X, n_jobs=2)
        approx_errors.append(error_curr)
        if verbose:
            print "error:", error_curr
            print "error difference:", (error_curr - error_prev)
            error_prev = error_curr
        print "duration:", it_duration, "seconds"
        if (it > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev):
            patience += 1
        it += 1
    print ""
    return D, Z
示例#4
0
def projected_grad_desc(X,
                        n_atoms=None,
                        sparse_coder=None,
                        batch_size=None,
                        D_init=None,
                        eta=None,
                        mu=None,
                        n_epochs=None,
                        non_neg=False,
                        verbose=False,
                        n_jobs=1,
                        mmap=False):
    """
    X: the data matrix of shape (n_features,n_samples)
    n_atoms: the number of atoms in the dictionary
    sparse_coder: must be an instance of the sparse_coding.sparse_encoder class
    batch_size: the number of datapoints in each iteration
    D_init: the initial dictionary. If None, we initialize it with randomly
            selected datapoints.
    eta: the learning rate
    mu:  the mutual coherence penalty
    n_epochs: the number of times we iterate over the dataset
    non_neg: if set to True, it uses non-negativity constraints
    n_jobs: the number of CPU threads
    mmap: if set to True, the algorithm applies memory mapping to save memory

    Note that a	large batch_size implies
    faster execution but high memory overhead, while
    a smaller batch_size implies
    slower execution but low memory overhead
    """

    # dont monitor sparse coding
    sparse_coder.verbose = False
    n_features, n_samples = X.shape
    # initialize the dictionary
    # with the dataset
    if D_init is None:
        D, unused_data = init_dictionary(X,
                                         n_atoms,
                                         method='data',
                                         return_unused_data=True)
    else:
        D = D_init
    print "dictionary initialized"
    if mmap:
        D = get_mmap(D)

    batch_idx = gen_batches(n_samples, batch_size=batch_size)
    n_batches = len(batch_idx)
    n_iter = n_batches
    n_total_iter = n_epochs * n_iter
    I = np.eye(n_atoms)

    if n_batches > n_iter:
        print "will iterate on only {0:.2f}% of the dataset".format(
            (float(n_iter) / n_batches) * 100)

    if n_jobs > 1:
        set_openblas_threads(n_jobs)

    max_patience = 10
    error_curr = 0
    error_prev = 0
    patience = 0
    approx_errors = []
    incs = []
    for e in range(n_epochs):
        # cycle over the batches
        for i, batch in zip(range(n_iter), cycle(batch_idx)):
            X_batch = X[:, batch]
            # sparse coding step
            Z_batch = sparse_coder(X_batch, D)

            if verbose:
                progress = float((e * n_iter) + i) / n_total_iter
                sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" %
                                 (progress * 100))
                sys.stdout.flush()

            # the gradient of the approximation error
            grad_approx = np.dot(np.dot(D, Z_batch) - X_batch, Z_batch.T)
            # the gradient of the incoherence penalty
            if mu is not None and mu > 0:
                grad_incoh = 2 * mu * np.dot(D, np.dot(D.T, D) - I)
            else:
                grad_incoh = 0

            grad = grad_approx
            D = D - eta * grad
            # enforce non-negativity
            if non_neg:
                D[D < 0] = 0
            # project to l2 unit sphere
            D = norm_cols(D)
            # sparse coding
            Z = sparse_coder(X, D)
            from lyssa.dict_learning.utils import average_mutual_coherence
            approx_errors.append(approx_error(D, Z, X, n_jobs=n_jobs))
        # replace_unused_atoms(A,unused_data,i)

        if e < n_epochs - 1:
            print ""
            print "end of epoch {0}".format(e)
            error_curr = 0
            for i, batch in zip(range(n_iter), cycle(batch_idx)):
                X_batch = X[:, batch]
                # sparse coding step
                Z_batch = sparse_coder(X_batch, D)
                error_curr += approx_error(D, Z_batch, X_batch, n_jobs=n_jobs)
            if verbose:
                print ""
                print "error:", error_curr
                print "error difference:", (error_curr - error_prev)
                error_prev = error_curr
            if (e > 0) and (error_curr > 0.9 * error_prev
                            or error_curr > error_prev):
                patience += 1
            if patience >= max_patience:
                return D
    if verbose:
        sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (100))
        sys.stdout.flush()
        print ""
    return D
示例#5
0
def online_dict_learn(X, n_atoms, sparse_coder=None, batch_size=None, A=None, B=None, D_init=None,
                      beta=None, n_epochs=1, verbose=False, n_jobs=1, non_neg=False, mmap=False):
    """
    X: the data matrix of shape (n_features,n_samples)
    n_atoms: the number of atoms in the dictionary
    sparse_coder: must be an instance of the sparse_coding.sparse_encoder class
    batch_size: the number of datapoints in each iteration
    D_init: the initial dictionary. If None, we initialize it with randomly
            selected datapoints.
    eta: the learning rate
    mu:  the mutual coherence penalty
    n_epochs: the number of times we iterate over the dataset
    non_neg: if set to True, it uses non-negativity constraints
    n_jobs: the number of CPU threads
    mmap: if set to True, the algorithm applies memory mapping to save memory

    Note that a	large batch_size implies
    faster execution but high memory overhead, while
    a smaller batch_size implies
    slower execution but low memory overhead
    """

    # dont monitor sparse coding
    sparse_coder.verbose = False
    n_features, n_samples = X.shape
    # initialize using the data
    if D_init is None:
        D, unused_data = init_dictionary(X, n_atoms, method='data', return_unused_data=True)
    else:
        D = D_init
    print "dictionary initialized"
    if mmap:
        D = get_mmap(D)

    batch_idx = gen_batches(n_samples, batch_size=batch_size)
    n_batches = len(batch_idx)
    n_iter = n_batches
    n_total_iter = n_epochs * n_iter
    _eps = np.finfo(float).eps

    if n_jobs > 1:
        set_openblas_threads(n_jobs)

    if A is None and B is None:
        A = np.zeros((n_atoms, n_atoms))
        B = np.zeros((n_features, n_atoms))

    if beta is None:
        # create a sequence that converges to one
        beta = np.linspace(0, 1, num=n_iter)
    else:
        beta = np.zeros(n_iter) + beta

    max_patience = 10
    error_curr = 0
    error_prev = 0
    patience = 0
    approx_errors = []
    incs = []
    for e in range(n_epochs):
        # cycle over the batches
        for i, batch in zip(range(n_iter), cycle(batch_idx)):
            X_batch = X[:, batch]
            # sparse coding step
            Z_batch = sparse_coder(X_batch, D)
            # update A and B
            A = beta[i] * A + fast_dot(Z_batch, Z_batch.T)
            B = beta[i] * B + fast_dot(X_batch, Z_batch.T)
            if verbose:
                progress = float((e * n_iter) + i) / n_total_iter
                sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (progress * 100))
                sys.stdout.flush()

            DA = fast_dot(D, A)
            # this part could also be parallelized w.r.t the atoms
            for k in xrange(n_atoms):
                D[:, k] = (1 / (A[k, k] + _eps)) * (B[:, k] - DA[:, k]) + D[:, k]
            # enforce non-negativity constraints
            if non_neg:
                D[D < 0] = 0
            D = norm_cols(D)
        # replace_unused_atoms(A,unused_data,i)

        if e < n_epochs - 1:
            if patience >= max_patience:
                return D, A, B
            print ""
            print "end of epoch {0}".format(e)
            error_curr = 0
            for i, batch in zip(range(n_iter), cycle(batch_idx)):
                X_batch = X[:, batch]
                # sparse coding step
                Z_batch = sparse_coder(X_batch, D)
                error_curr += approx_error(D, Z_batch, X_batch, n_jobs=n_jobs)
            if verbose:
                print ""
                print "error:", error_curr
                print "error difference:", (error_curr - error_prev)
                error_prev = error_curr
            if (e > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev):
                patience += 1

    if verbose:
        sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (100))
        sys.stdout.flush()
        print ""
    return D, A, B
示例#6
0
def ksvd_dict_learn(X, n_atoms, init_dict='data', sparse_coder=None,
                    max_iter=20, non_neg=False, approx=False, eta=None,
                    n_cycles=1, n_jobs=1, mmap=False, verbose=True):
    """
    The K-SVD algorithm

    X: the data matrix of shape (n_features,n_samples)
    n_atoms: the number of atoms in the dictionary
    sparse_coder: must be an instance of the sparse_coding.sparse_encoder class
    approx: if true, invokes the approximate KSVD algorithm
    max_iter: the maximum number of iterations
    non_neg: if set to True, it uses non-negativity constraints
    n_cycles: the number of updates per atom (Dictionary Update Cycles)
    n_jobs: the number of CPU threads
    mmap: if set to True, the algorithm applies memory mapping to save memory
    """
    n_features, n_samples = X.shape
    shape = (n_atoms, n_samples)
    Z = np.zeros(shape)
    # dictionary initialization
    # track the datapoints that are not used as atoms
    unused_data = []
    if init_dict == 'data':
        from .utils import init_dictionary
        D, unused_data = init_dictionary(X, n_atoms, method=init_dict, return_unused_data=True)
    else:
        D = np.copy(init_dict)

    if mmap:
        D = get_mmap(D)
        sparse_coder.mmap = True

    print "dictionary initialized"
    max_patience = 10
    error_curr = 0
    error_prev = 0
    it = 0
    patience = 0
    approx_errors = []

    while it < max_iter and patience < max_patience:
        print "----------------------------"
        print "iteration", it
        print ""
        it_start = time.time()
        if verbose:
            t_sparse_start = time.time()
        # sparse coding
        Z = sparse_coder(X, D)
        if verbose:
            t_sparse_duration = time.time() - t_sparse_start
            print "sparse coding took", t_sparse_duration, "seconds"
            t_dict_start = time.time()

        # ksvd to learn the dictionary
        set_openblas_threads(n_jobs)
        if approx:
            D, _, unused_atoms = approx_ksvd(X, D, Z, n_cycles=n_cycles)
        elif non_neg:
            D, _, unused_atoms = nn_ksvd(X, D, Z, n_cycles=it)
        else:
            D, _, unused_atoms = ksvd(X, D, Z, n_cycles=n_cycles)
        set_openblas_threads(1)
        if verbose:
            t_dict_duration = time.time() - t_dict_start
            print "K-SVD took", t_dict_duration, "seconds"
            print ""
        if verbose:
            print "number of unused atoms:", len(unused_atoms)
        # replace the unused atoms in the dictionary
        for j in range(len(unused_atoms)):
            # no datapoint available to be used as atom
            if len(unused_data) == 0:
                break
            _idx = np.random.choice(unused_data, size=1)
            idx = _idx[0]
            D[:, unused_atoms[j]] = X[:, idx]
            D[:, unused_atoms[j]] = normalize(D[:, unused_atoms[j]])
            unused_data.remove(idx)

        if eta is not None:
            # do not force incoherence in the last iteration
            if it < max_iter - 1:
                # force Mutual Incoherence
                D, unused_data = force_mi(D, X, Z, unused_data, eta)
        if verbose:
            amc = average_mutual_coherence(D)
            print "average mutual coherence:", amc

        it_duration = time.time() - it_start
        # calculate the approximation error
        error_curr = approx_error(D, Z, X, n_jobs=2)
        approx_errors.append(error_curr)
        if verbose:
            print "error:", error_curr
            print "error difference:", (error_curr - error_prev)
            error_prev = error_curr
        print "duration:", it_duration, "seconds"
        if (it > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev):
            patience += 1
        it += 1
    print ""
    return D, Z
示例#7
0
def lc_ksvd(X,
            y,
            D,
            Q,
            alpha=1,
            beta=1,
            lambda1=1,
            lambda2=1,
            sparse_coder=None,
            max_iter=2,
            approx=False,
            mmap=False,
            verbose=False,
            n_jobs=1):
    """
    X: the data matrix with shape (n_features,n_samples)
    y: the vector that contains the label of each datapoint
    Q: a matrix with shape (n_atoms,n_samples). The element Q_{k,i} is 1 if the ith datapoint and the k atom belong to the same class
    lambda1: the regularizer for the W matrix i.e lambda1 * ||W||_{2}
    lambda2: the regularizer for the transformation matrix G i.e lambda2 * ||G||_{2}
    alpha: the weight we assign for sparse code discrimination
    beta: is the weight we assign for correct classification: beta*||H - WZ||_{2}
    """

    n_classes = len(set(y))
    n_atoms = D.shape[1]
    n_features, n_samples = X.shape
    Z = np.zeros((n_atoms, n_samples))

    # create the class label matrix
    # H is the class label matrix which has a
    # datapoint in each column with H_{c,i}=1 if
    # the ith datapoint belongs to the cth class
    H = np.zeros((n_classes, n_samples)).astype(int)

    for i in xrange(n_samples):
        H[y[i], i] = 1

    if n_jobs > 1:
        set_openblas_threads(n_jobs)
    # classifier parameter initialization
    I = np.eye(n_atoms)

    # W_{c,:} are the parameters of the linear classifier for the cth class
    W = np.dot(inv(np.dot(Z, Z.T) + lambda1 * I), np.dot(Z, H.T)).T
    # The matrix G forces the sparse codes to be discriminative and approximate the matrix Q,
    # and has shape (n_atoms,n_atoms)
    G = np.dot(inv(np.dot(Z, Z.T) + lambda2 * I), np.dot(Z, Q.T)).T

    # stack the data matrix X with class label matrix H
    # and matrix Q
    _X = np.vstack((X, np.sqrt(alpha) * Q))
    _X = np.vstack((_X, np.sqrt(beta) * H))

    if mmap:
        _X = get_mmap(_X)

    _normalizer = np.array(
        [np.sqrt(np.dot(D[:, j], D[:, j])) for j in range(D.shape[1])])
    D = D / _normalizer
    G = G / _normalizer
    W = W / _normalizer

    # stack the dictionary D with the weight matrix W
    # and matrix G
    _D = np.vstack((D, np.sqrt(alpha) * G))
    _D = np.vstack((_D, np.sqrt(beta) * W))

    if mmap:
        _D = get_mmap(_D)

    if verbose:
        error_curr = 0
        error_prev = 0

    for it in range(max_iter):

        print "iteration", it
        it_start = time.time()
        if verbose:
            t_sparse_start = time.time()
        # sparse coding
        Z = sparse_coder(X, D)
        if verbose:
            t_sparse_duration = time.time() - t_sparse_start
            print "\nsparse coding took", t_sparse_duration, "seconds"
            t_dict_start = time.time()

        _D, _, unused_atoms = ksvd(_X, _D, Z, verbose=True)

        if verbose:
            t_dict_duration = time.time() - t_dict_start
            print "\nK-SVD took", t_dict_duration, "seconds"
        if verbose:
            print "number of unused atoms:", len(unused_atoms)

        D = _D[:n_features, :]
        G = _D[n_features:n_features + n_atoms, :]
        W = _D[n_features + n_atoms:, :]

        _normalizer = np.array(
            [np.sqrt(np.dot(D[:, j], D[:, j])) for j in range(D.shape[1])])

        D = D / _normalizer
        G = G / _normalizer
        W = W / _normalizer
        # stack the dictionary D with the weight matrix W
        # and matrix G
        _D = np.vstack((D, np.sqrt(alpha) * G))
        _D = np.vstack((_D, np.sqrt(beta) * W))

        it_duration = time.time() - it_start
        if verbose:
            # calculate the approximation error
            error_curr = approx_error(D, Z, X, n_jobs=2)
            print "error:", error_curr
            print "error difference:", (error_curr - error_prev)
            n_correct = np.array([
                y[i] == np.argmax(np.dot(W, Z[:, i]))
                for i in range(Z.shape[1])
            ]).nonzero()[0].size
            class_acc = n_correct / float(n_samples)
            print "classification accuracy", class_acc
            error_prev = error_curr
        print "duration:", it_duration, "seconds"
        print "----------------------"

    return D, Z, W
示例#8
0
def online_dict_learn(X, n_atoms, sparse_coder=None, batch_size=None, A=None, B=None, D_init=None,
                      beta=None, n_epochs=1, verbose=False, n_jobs=1, non_neg=False, mmap=False):
    """
    X: the data matrix of shape (n_features,n_samples)
    n_atoms: the number of atoms in the dictionary
    sparse_coder: must be an instance of the sparse_coding.sparse_encoder class
    batch_size: the number of datapoints in each iteration
    D_init: the initial dictionary. If None, we initialize it with randomly
            selected datapoints.
    eta: the learning rate
    mu:  the mutual coherence penalty
    n_epochs: the number of times we iterate over the dataset
    non_neg: if set to True, it uses non-negativity constraints
    n_jobs: the number of CPU threads
    mmap: if set to True, the algorithm applies memory mapping to save memory

    Note that a	large batch_size implies
    faster execution but high memory overhead, while
    a smaller batch_size implies
    slower execution but low memory overhead
    """

    # dont monitor sparse coding
    sparse_coder.verbose = False
    n_features, n_samples = X.shape
    # initialize using the data
    if D_init is None:
        D, unused_data = init_dictionary(X, n_atoms, method='data', return_unused_data=True)
    else:
        D = D_init
    print "dictionary initialized"
    if mmap:
        D = get_mmap(D)

    batch_idx = gen_batches(n_samples, batch_size=batch_size)
    n_batches = len(batch_idx)
    n_iter = n_batches
    n_total_iter = n_epochs * n_iter
    _eps = np.finfo(float).eps

    if n_jobs > 1:
        set_openblas_threads(n_jobs)

    if A is None and B is None:
        A = np.zeros((n_atoms, n_atoms))
        B = np.zeros((n_features, n_atoms))

    if beta is None:
        # create a sequence that converges to one
        beta = np.linspace(0, 1, num=n_iter)
    else:
        beta = np.zeros(n_iter) + beta

    max_patience = 10
    error_curr = 0
    error_prev = 0
    patience = 0
    approx_errors = []
    incs = []
    for e in range(n_epochs):
        # cycle over the batches
        for i, batch in zip(range(n_iter), cycle(batch_idx)):
            X_batch = X[:, batch]
            # sparse coding step
            Z_batch = sparse_coder(X_batch, D)
            # update A and B
            A = beta[i] * A + fast_dot(Z_batch, Z_batch.T)
            B = beta[i] * B + fast_dot(X_batch, Z_batch.T)
            if verbose:
                progress = float((e * n_iter) + i) / n_total_iter
                sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (progress * 100))
                sys.stdout.flush()

            DA = fast_dot(D, A)
            # this part could also be parallelized w.r.t the atoms
            for k in xrange(n_atoms):
                D[:, k] = (1 / (A[k, k] + _eps)) * (B[:, k] - DA[:, k]) + D[:, k]
            # enforce non-negativity constraints
            if non_neg:
                D[D < 0] = 0
            D = norm_cols(D)
        # replace_unused_atoms(A,unused_data,i)

        if e < n_epochs - 1:
            if patience >= max_patience:
                return D, A, B
            print ""
            print "end of epoch {0}".format(e)
            error_curr = 0
            for i, batch in zip(range(n_iter), cycle(batch_idx)):
                X_batch = X[:, batch]
                # sparse coding step
                Z_batch = sparse_coder(X_batch, D)
                error_curr += approx_error(D, Z_batch, X_batch, n_jobs=n_jobs)
            if verbose:
                print ""
                print "error:", error_curr
                print "error difference:", (error_curr - error_prev)
                error_prev = error_curr
            if (e > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev):
                patience += 1

    if verbose:
        sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (100))
        sys.stdout.flush()
        print ""
    return D, A, B
示例#9
0
def lc_ksvd(X, y, D, Q, alpha=1, beta=1, lambda1=1, lambda2=1,
            sparse_coder=None, max_iter=2, approx=False, mmap=False, verbose=False, n_jobs=1):
    """
    X: the data matrix with shape (n_features,n_samples)
    y: the vector that contains the label of each datapoint
    Q: a matrix with shape (n_atoms,n_samples). The element Q_{k,i} is 1 if the ith datapoint and the k atom belong to the same class
    lambda1: the regularizer for the W matrix i.e lambda1 * ||W||_{2}
    lambda2: the regularizer for the transformation matrix G i.e lambda2 * ||G||_{2}
    alpha: the weight we assign for sparse code discrimination
    beta: is the weight we assign for correct classification: beta*||H - WZ||_{2}
    """

    n_classes = len(set(y))
    n_atoms = D.shape[1]
    n_features, n_samples = X.shape
    Z = np.zeros((n_atoms, n_samples))

    # create the class label matrix
    # H is the class label matrix which has a
    # datapoint in each column with H_{c,i}=1 if
    # the ith datapoint belongs to the cth class
    H = np.zeros((n_classes, n_samples)).astype(int)

    for i in xrange(n_samples):
        H[y[i], i] = 1

    if n_jobs > 1:
        set_openblas_threads(n_jobs)
    # classifier parameter initialization
    I = np.eye(n_atoms)

    # W_{c,:} are the parameters of the linear classifier for the cth class
    W = np.dot(inv(np.dot(Z, Z.T) + lambda1 * I), np.dot(Z, H.T)).T
    # The matrix G forces the sparse codes to be discriminative and approximate the matrix Q,
    # and has shape (n_atoms,n_atoms)
    G = np.dot(inv(np.dot(Z, Z.T) + lambda2 * I), np.dot(Z, Q.T)).T

    # stack the data matrix X with class label matrix H
    # and matrix Q
    _X = np.vstack((X, np.sqrt(alpha) * Q))
    _X = np.vstack((_X, np.sqrt(beta) * H))

    if mmap:
        _X = get_mmap(_X)

    _normalizer = np.array([np.sqrt(np.dot(D[:, j], D[:, j])) for j in range(D.shape[1])])
    D = D / _normalizer
    G = G / _normalizer
    W = W / _normalizer

    # stack the dictionary D with the weight matrix W
    # and matrix G
    _D = np.vstack((D, np.sqrt(alpha) * G))
    _D = np.vstack((_D, np.sqrt(beta) * W))

    if mmap:
        _D = get_mmap(_D)

    if verbose:
        error_curr = 0
        error_prev = 0

    for it in range(max_iter):

        print "iteration", it
        it_start = time.time()
        if verbose:
            t_sparse_start = time.time()
        # sparse coding
        Z = sparse_coder(X, D)
        if verbose:
            t_sparse_duration = time.time() - t_sparse_start
            print "\nsparse coding took", t_sparse_duration, "seconds"
            t_dict_start = time.time()

        _D, _, unused_atoms = ksvd(_X, _D, Z, verbose=True)

        if verbose:
            t_dict_duration = time.time() - t_dict_start
            print "\nK-SVD took", t_dict_duration, "seconds"
        if verbose:
            print "number of unused atoms:", len(unused_atoms)

        D = _D[:n_features, :]
        G = _D[n_features:n_features + n_atoms, :]
        W = _D[n_features + n_atoms:, :]

        _normalizer = np.array([np.sqrt(np.dot(D[:, j], D[:, j])) for j in range(D.shape[1])])

        D = D / _normalizer
        G = G / _normalizer
        W = W / _normalizer
        # stack the dictionary D with the weight matrix W
        # and matrix G
        _D = np.vstack((D, np.sqrt(alpha) * G))
        _D = np.vstack((_D, np.sqrt(beta) * W))

        it_duration = time.time() - it_start
        if verbose:
            # calculate the approximation error
            error_curr = approx_error(D, Z, X, n_jobs=2)
            print "error:", error_curr
            print "error difference:", (error_curr - error_prev)
            n_correct = np.array([y[i] == np.argmax(np.dot(W, Z[:, i]))
                                  for i in range(Z.shape[1])]).nonzero()[0].size
            class_acc = n_correct / float(n_samples)
            print "classification accuracy", class_acc
            error_prev = error_curr
        print "duration:", it_duration, "seconds"
        print "----------------------"

    return D, Z, W
示例#10
0
def projected_grad_desc(X, n_atoms=None, sparse_coder=None, batch_size=None, D_init=None,
                        eta=None, mu=None, n_epochs=None, non_neg=False, verbose=False, n_jobs=1, mmap=False):
    """
    X: the data matrix of shape (n_features,n_samples)
    n_atoms: the number of atoms in the dictionary
    sparse_coder: must be an instance of the sparse_coding.sparse_encoder class
    batch_size: the number of datapoints in each iteration
    D_init: the initial dictionary. If None, we initialize it with randomly
            selected datapoints.
    eta: the learning rate
    mu:  the mutual coherence penalty
    n_epochs: the number of times we iterate over the dataset
    non_neg: if set to True, it uses non-negativity constraints
    n_jobs: the number of CPU threads
    mmap: if set to True, the algorithm applies memory mapping to save memory

    Note that a	large batch_size implies
    faster execution but high memory overhead, while
    a smaller batch_size implies
    slower execution but low memory overhead
    """

    if eta is None:
        raise ValueError('Must specify learning rate.')

    # don't monitor sparse coding
    sparse_coder.verbose = False
    n_features, n_samples = X.shape
    # initialize the dictionary
    # with the dataset
    if D_init is None:
        D, unused_data = init_dictionary(X, n_atoms, method='data', return_unused_data=True)
    else:
        D = D_init
    print "dictionary initialized"
    if mmap:
        D = get_mmap(D)

    batch_idx = gen_batches(n_samples, batch_size=batch_size)
    n_batches = len(batch_idx)
    n_iter = n_batches
    n_total_iter = n_epochs * n_iter
    I = np.eye(n_atoms)

    if n_batches > n_iter:
        print "will iterate on only {0:.2f}% of the dataset".format((float(n_iter) / n_batches) * 100)

    if n_jobs > 1:
        set_openblas_threads(n_jobs)

    max_patience = 10
    error_prev = 0
    patience = 0
    approx_errors = []
    for e in range(n_epochs):
        # cycle over the batches
        for i, batch in zip(range(n_iter), cycle(batch_idx)):
            X_batch = X[:, batch]
            # sparse coding step
            Z_batch = sparse_coder(X_batch, D)

            if verbose:
                progress = float((e * n_iter) + i) / n_total_iter
                sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (progress * 100))
                sys.stdout.flush()

            # the gradient of the approximation error
            grad_approx = np.dot(np.dot(D, Z_batch) - X_batch, Z_batch.T)
            # the gradient of the incoherence penalty
            if mu is not None and mu > 0:
                grad_incoh = 2 * mu * np.dot(D, np.dot(D.T, D) - I)
            else:
                grad_incoh = 0

            grad = grad_approx
            D = D - (eta * grad) + grad_incoh
            # enforce non-negativity
            if non_neg:
                D[D < 0] = 0
            # project to l2 unit sphere
            D = norm_cols(D)
            # sparse coding
            Z = sparse_coder(X, D)
        #replace_unused_atoms(A,unused_data,i)

        if e < n_epochs - 1:
            print ""
            print "end of epoch {0}".format(e)
            error_curr = 0
            for i, batch in zip(range(n_iter), cycle(batch_idx)):
                X_batch = X[:, batch]
                # sparse coding step
                Z_batch = sparse_coder(X_batch, D)
                error_curr += approx_error(D, Z_batch, X_batch, n_jobs=n_jobs)
            if verbose:
                print ""
                print "error:", error_curr
                print "error difference:", (error_curr - error_prev)
                error_prev = error_curr
            if (e > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev):
                patience += 1
            if patience >= max_patience:
                return D
    if verbose:
        sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (100))
        sys.stdout.flush()
        print ""
    return D