Пример #1
0
def lstsq(a, b):
    """
    Fast least-squares solver, from https://gist.github.com/aldro61/5889795
    Returns the least-squares solution to a linear matrix equation.
    Solves the equation `a x = b` by computing a vector `x` that
    minimizes the Euclidean 2-norm `|| b - a x ||^2`.  The equation may
    be under-, well-, or over- determined (i.e., the number of
    linearly independent rows of `a` can be less than, equal to, or
    greater than its number of linearly independent columns).  If `a`
    is square and of full rank, then `x` (but for round-off error) is
    the "exact" solution of the equation.
    Parameters
    ----------
    a : (M, N) array_like
        "Coefficient" matrix.
    b : (M,) array_like
        Ordinate or "dependent variable" values.
    residuals : bool
        Compute the residuals associated with the least-squares solution
    Returns
    -------
    x : (M,) ndarray
        Least-squares solution. The shape of `x` depends on the shape of
        `b`.
    residuals : int
        Residuals: ``b - a*x``.
    """
    a = np.asarray(a, order='c')
    i = dgemm(alpha=1.0, a=a.T, b=a.T, trans_b=True)
    x = np.linalg.solve(i, dgemm(alpha=1.0, a=a.T, b=b)).flatten()
    return x, np.dot(a, x) - b
Пример #2
0
def Transient_EIEW(x, alpha_start, alpha, Sn, Sn_inv, omega, wis):
    """
    Evaluates the cost function given all parameters.
    In here, we used the FORTRAN dgem-functions 
    instead of @ for efficient matrix multiplication.
    """

    N = x.shape[0]
    m = alpha.shape[1]

    P_alpha_F = alpha_start
    cost = omega * np.sum(x)

    # cost of clients already entered (only waiting time)
    for i in range(1, wis + 1):

        cost += (omega - 1) * np.sum(
            dgemm(1, P_alpha_F, Sn_inv[0:i * m, 0:i * m]))

        F = 1 - np.sum(P_alpha_F)
        P_alpha_F = np.hstack((np.matrix(P_alpha_F), alpha * F))

    # cost of clients to be scheduled
    for i in range(wis + 1, N + wis + 1):

        exp_Si = expm(Sn[0:i * m, 0:i * m] * x[i - wis - 1])
        cost += float(
            dgemv(1, dgemm(1, P_alpha_F, Sn_inv[0:i * m, 0:i * m]),
                  np.sum(omega * np.eye(i * m) - exp_Si, 1)))

        P = dgemm(1, P_alpha_F, exp_Si)
        F = 1 - np.sum(P)
        P_alpha_F = np.hstack((np.matrix(P), alpha * F))

    return cost
Пример #3
0
def lls_with_blas(x, y, residuals=False):
    """
    https://gist.github.com/aldro61/5889795
    """
    i = dgemm(alpha=1.0, a=x.T, b=x.T, trans_b=True)
    beta = np.linalg.solve(i, dgemm(alpha=1.0, a=x.T, b=y)).flatten()
    return beta
Пример #4
0
    def project_whitened_vector(self, vector_instance):
        """
        Returns a sheared (non-orthogonal) reconstruction of
        ``vector_instance``.

        Parameters
        ----------
        vector_instance : (n_features,) ndarray
            A novel vector.

        Returns
        -------
        sheared_reconstruction : (n_features,) ndarray
            A sheared (non-orthogonal) reconstruction of ``vector_instance``
        """
        whitened_components = self.whitened_components
        weights = dgemm(alpha=1.0,
                        a=vector_instance.T,
                        b=whitened_components.T,
                        trans_a=True)
        return dgemm(alpha=1.0,
                     a=weights.T,
                     b=whitened_components.T,
                     trans_a=True,
                     trans_b=True)
Пример #5
0
def _llsq_solver_fast(A, y):
    """ Return the least-squares solution to a linear matrix equation.
    Solves the equation `A x = y` by computing a vector `x` that
    minimizes the Euclidean 2-norm `|| b - a x ||^2`.  The equation may
    be under-, well-, or over- determined (i.e., the number of
    linearly independent rows of `A` can be less than, equal to, or
    greater than its number of linearly independent columns).  If `A`
    is square and of full rank, then `x` (but for round-off error) is
    the "exact" solution of the equation.

    However, if A is rank-deficient, this solver may fail. In that case, use
    :func:`_llsq_solver_pinv`.

    :param A: (m, d) array like
    :param y: (m,) array_like
    :returns x: (d,) ndarray, least square solution

    """
    if type(A) != np.ndarray or not A.flags['C_CONTIGUOUS']:
        warn("Matrix a is not a C-contiguous numpy array. " +
             "The solver will create a copy, which will result" +
             " in increased memory usage.")

    A = np.asarray(A, order='c')
    i = dgemm(alpha=1.0, a=A.T, b=A.T, trans_b=True)
    x = np.linalg.solve(i, dgemm(alpha=1.0, a=A.T, b=y)).flatten()

    return x
Пример #6
0
    def project_out_vectors(self, vectors):
        """
        Returns a version of `vectors` where all the basis of the
        model have been projected out.

        Parameters
        ----------
        vectors : (n_vectors, n_features) ndarray
            A matrix of novel vectors.

        Returns
        -------
        projected_out : (n_vectors, n_features) ndarray
            A copy of `vectors` with all basis of the model
            projected out.
        """
        weights = dgemm(alpha=1.0,
                        a=vectors.T,
                        b=self.components.T,
                        trans_a=True)
        return (vectors - dgemm(alpha=1.0,
                                a=weights.T,
                                b=self.components.T,
                                trans_a=True,
                                trans_b=True))
Пример #7
0
def _solve_matrix(a, b, residuals=False):
    i = dgemm(alpha=1.0, a=a.T, b=a.T, trans_b=True)
    x = np.linalg.solve(i, dgemm(alpha=1.0, a=a.T, b=b)).flatten()

    if residuals:
        return x, np.linalg.norm(np.dot(a, x) - b)
    else:
        return x
Пример #8
0
def lsq(a, b, residuals=False):
    if type(a) != np.ndarray or not a.flags['C_CONTIGUOUS']:
        warn('Matrix a is not a C-contiguous numpy array. The solver will create a copy, which will result' + \
             ' in increased memory usage.')
    a = np.asarray(a, order='c')
    i = dgemm(alpha=1.0, a=a.T, b=a.T, trans_b=True)
    x = np.linalg.solve(i, dgemm(alpha=1.0, a=a.T, b=b)).flatten()

    if residuals:
        return x, np.linalg.norm(np.dot(a, x) - b)
    else:
        return x
Пример #9
0
 def g09_calculate_fock(c, s, nbf, e):
     # Calculate the Fock matrix
     sc_temp = blas.dgemm(1.0, np.array(c), np.array(s), 1.0)
     sc = np.asmatrix(sc_temp)
     sce = np.matlib.zeros((nbf, nbf))
     for i in range(nbf):
         sce[i, :] = e[0, i] * sc[i, :]
     c_lu, ipiv, info = lapack.dgetrf(c)
     ic_lu, info = lapack.dgetri(c_lu, ipiv)
     f_temp = blas.dgemm(1.0, np.array(ic_lu), np.array(sce), 1.0)
     f = np.asmatrix(f_temp)
     return f
 def forward(self, X):
     # forward propagation through our network
     # dot product of X (input) and first set of 3x2 weights
     #self.z = np.dot(X, self.W1)
     self.z = FD.dgemm(alpha=1.0, a=X.T, b=self.W1.T,
                       trans_a=True, trans_b=True)  # MKL fast dot product
     self.z2 = NN.Elu((self.z))  # ACTIVATION FUNCTION
     # dot product of hidden layer (z2) and second set of 3x1 weights
     #self.z3 = np.dot(self.z2, self.W2)
     self.z3 = FD.dgemm(alpha=1.0, a=self.z2.T,
                        b=self.W2.T, trans_a=True, trans_b=True)
     o = expit((self.z3))  # final activation function
     return o
Пример #11
0
def dgemm(a, b, c=None, alpha=1.0, beta=0.0):
    # Fortran memory layout so we reorder memory without copying, and then
    # form c^T, finally converting to C layout without copying.

    if not _is_contiguous(a) or not _is_contiguous(b):
        print('WARNING: DGEMM called on non-contiguous data')

    m, k = a.shape
    n = b.shape[1]
    assert k == b.shape[0]

    a, ta = _reorder_fortran(a)
    b, tb = _reorder_fortran(b)

    if c is None:
        c = np.zeros((m, n), dtype=np.float64, order='C')

    if m == 0 or n == 0 or k == 0:
        return c

    c, tc = _reorder_fortran(c)

    c = blas.dgemm(alpha=alpha,
                   a=b,
                   b=a,
                   c=c,
                   beta=beta,
                   trans_a=not tb,
                   trans_b=not ta)

    c, tc = _reorder_c(c)

    return c
Пример #12
0
def polynomial_kernel_matrix(P, Q, c, degree):
    """
    Calculate kernel matrix using polynomial kernel.
    
    k(p,q) = (p^{T}q + c)^d

    Parameters:
    -----------
    P : `numpy.ndarray`
        (nDataP, nDim) matrix of data. Each row corresponds to a data point.
    Q : `numpy.ndarray`
        (nDataQ, nDim) matrix of data. Each row corresponds to a data point.   
    c : `float`
        Bias term, c >= 0.      
    degree : `int`
        Degree of the polynomial kernel function.

    Returns:
    --------
    K : `numpy.ndarray`
        (nDataP,nDataQ) matrix, the polynomial kernel matrix of the P and Q data matrix.        
    """
    return ne.evaluate('(c + A)**d', {
        'A': dgemm(alpha=1.0, a=P, b=Q, trans_b=True),
        'c': c,
        'd': degree
    })
Пример #13
0
def xc_scalar_ni(me, sp1, R1, sp2, R2, xc_code, deriv, **kw):
    from pyscf.dft.libxc import eval_xc
    """
    Computes overlap for an atom pair. The atom pair is given by a pair of species indices
    and the coordinates of the atoms.
    Args: 
      sp1,sp2 : specie indices, and
      R1,R2 :   respective coordinates in Bohr, atomic units
    Result:
      matrix of orbital overlaps
    The procedure uses the numerical integration in coordinate space.
  """
    grids = build_3dgrid(me, sp1, R1, sp2, R2, **kw)
    rho = dens_libnao(grids.coords, me.sv.nspin)
    exc, vxc, fxc, kxc = libxc.eval_xc(xc_code,
                                       rho.T,
                                       spin=me.sv.nspin - 1,
                                       deriv=deriv)

    ao1 = ao_eval(me.ao1, R1, sp1, grids.coords)

    if deriv == 1:
        #print(' vxc[0].shape ', vxc[0].shape)
        ao1 = ao1 * grids.weights * vxc[0]
    elif deriv == 2:
        ao1 = ao1 * grids.weights * fxc[0]
    else:
        print(' deriv ', deriv)
        raise RuntimeError('!deriv!')

    ao2 = ao_eval(me.ao2, R2, sp2, grids.coords)
    overlaps = blas.dgemm(1.0, ao1, ao2.T)

    return overlaps
Пример #14
0
def matrixMult(A,B):
   global initializedMartix
   global useNumpy  # module based variables

   mprint("A",A)
   mprint("B",B)
   if not initializedMatrix:
       matrix_initialize()

   if cuda.useCUDA:
       return cuda.dot(A,B)

   if useNumpy:
       return np.dot(A,B)

   # If the matrices are in Fortran order then the computations will be faster
   # when using dgemm.  Otherwise, the function will copy the matrix and that takes time.
   if not A.flags['F_CONTIGUOUS']:
      AA = A.T
      transA = True
   else:
      AA = A
      transA = False

   if not B.flags['F_CONTIGUOUS']:
      BB = B.T
      transB = True
   else:
      BB = B
      transB = False

   return dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
Пример #15
0
def gemm(alpha, A, B, C, beta, MKLProc):
    from scipy.linalg.blas import dgemm
    from numpy import transpose
    import os
    os.environ['MKL_NUM_THREADS'] = str(MKLProc)
    B = transpose(B)
    C = dgemm(alpha, A, B, c=C, beta=beta)
    return C
Пример #16
0
def project_onto_null_space(a,
                            n,
                            nt,
                            lb,
                            ub,
                            n_steps_projection,
                            reactions,
                            tol=1e-4,
                            verbose=0):
    # Do matrix multuplication
    a = dgemm(1.0, n, dgemm(1.0, nt, a)).transpose()[0]

    i, accept = 0, True
    while any(a < lb - tol) or any(a > ub + tol):
        print '[WARNING] Bounds reseted: %d times' % (i + 1)
        print '\t lower bound: ', reactions[a < lb - tol]
        print '\t upper bound: ', reactions[a > ub + tol]

        # Fix bounds violations
        lb_violations = a < lb - tol
        if any(lb_violations):
            a[lb_violations] = lb[lb_violations]

        ub_violations = a > ub
        if any(ub_violations):
            a[ub_violations] = ub[ub_violations]

        # Do matrix multiplication
        a = dgemm(1.0, n, dgemm(1.0, nt, a)).transpose()[0]

        # Verbose
        if i > 10:
            accept, n_steps_projection = False, max(25,
                                                    n_steps_projection - 100)
            # Verbose
            if verbose > 1:
                print '[WARNING] Point discarded! lb: %r, ub: %r' % (
                    any(lb_violations), any(ub_violations))
            break

        i += 1

    if i == 0:
        n_steps_projection = min(25, n_steps_projection + 25)

    return accept, n_steps_projection, a.copy()
def main():
    lock = int(sys.argv[1])
    size_list = [2**i for i in range(8,16)]
    no_runs = 10
    time_df = pd.DataFrame(columns=["My function (Python)",
                                    "My function (32 Cores Python)",
                                    "matmul (NumPy Python)",
                                    "dgemm (Python)",
                                    "sgemm (Python)"])
    for mat_size in size_list:
        print(f"Mat size: {mat_size}")
        for i in range(no_runs):
            print(f"i: {i}")

            m1 = np.random.rand(mat_size,mat_size).astype(np.float32)
            m2 = np.random.rand(mat_size,mat_size).astype(np.float32)
            new_times=[]

            time.sleep(10)
            
            if mat_size =< 2048:
                my_func_start = time.perf_counter()
                m_myfunc = matrix_mult(m1,m2)
                my_func_finish = time.perf_counter()
                new_times.append(round(my_func_finish-my_func_start,8))
                
                my_func_32cores_start = time.perf_counter()
                time_taken, m_myfunc32 = gen_time_results(mat_size,32,m1,m2)
                my_func_32cores_finish = time.perf_counter()
                new_times.append(round(my_func_32cores_finish-my_func_32cores_start,8))

            else:
                new_times.append(None)
                new_times.append(None)
            
            numpy_start = time.perf_counter()
            mn = np.matmul(m1,m2)
            numpy_finish = time.perf_counter()
            new_times.append(round(numpy_finish-numpy_start,8))
            
            dgemm_start = time.perf_counter()
            md = FB.dgemm(alpha=1.0, a=m1, b=m2)
            dgemm_finish = time.perf_counter()
            new_times.append(round(dgemm_finish-dgemm_start,8))
            
            sgemm_start = time.perf_counter()
            ms = FB.sgemm(alpha=1.0, a=m1, b=m2)
            sgemm_finish = time.perf_counter()
            new_times.append(round(sgemm_finish-sgemm_start,8))

            print(new_times)
            
            time_df = time_df.append( pd.DataFrame(columns=["My function (Python)","My function (32 Cores Python)",
                                    "matmul (NumPy_Python)","dgemm (Python)","sgemm (Python)"],index=[mat_size]) )
            if lock:
                time_df.to_pickle("time_df_libraries_lock.pkl")
            else:
                time_df.to_pickle("time_df_libraries_no_lock.pkl")
Пример #18
0
def seidel(adj):
    """
    Implements the Seidel algorithm for APSP on unweighted, undirected graphs.
    For a given adjacency matrix, returns a like-sized matrix holding the
    distances of the shortest paths between nodes.
    """
    n = adj.shape[0]

    # Prepare our base case to compare against, an n x n matrix that is 0 on the
    # diagonal and 1 everywhere else.
    base_case = np.ones_like(adj)
    np.fill_diagonal(base_case, 0)
    
    # Let Z = A . A
    # Using DGEMM directly here instead of np.dot because the latter is 10x slower.
    z = dgemm(1, adj, adj)

    # Let B be an n x n binary matrix
    # where b_ij =
    #   1 if it isn't on the diagonal and a_ij = 1 or z_ij > 0
    #   0 otherwise
    b = np.where(
        np.logical_and(
            # Not on the diagonal
            np.logical_not(np.eye(n)),
            np.logical_or(adj == 1, z > 0)),
        1,
        0)

    # If all b_ij not on the diagonal are 1
    if (b == base_case).all():
        return 2 * b - adj

    t = seidel(b)

    # Let X = T . A
    x = dgemm(1, t, adj)

    # Return n x n matrix D
    # where d_ij =
    #   2*t_ij     if x_ij >= t_ij * degree(j)
    #   2*t_ij - 1 if x_ij <  t_ij * degree(j)
    degrees = np.repeat(np.sum(adj, axis=1)[None, :], adj.shape[0], axis=0)
    d = np.where(x >= np.multiply(t, degrees), 2 * t, 2 * t - 1)
    return d.astype('float32')
Пример #19
0
def calc_XVX(X, VX):

    XVX = np.zeros((VX.shape[1], X.shape[0], VX.shape[2]))

    for i in range(XVX.shape[2]):

        XVX[:, :, i] = blas.dgemm(1.0, VX[:, :, i], X, trans_a=1, trans_b=1)

    return XVX
Пример #20
0
    def project_out_vectors(self, vectors):
        """
        Returns a version of `vectors` where all the basis of the
        model have been projected out.

        Parameters
        ----------
        vectors : (n_vectors, n_features) ndarray
            A matrix of novel vectors.

        Returns
        -------
        projected_out : (n_vectors, n_features) ndarray
            A copy of `vectors` with all basis of the model
            projected out.
        """
        weights = dgemm(alpha=1.0, a=vectors.T, b=self.components.T, trans_a=True)
        return vectors - dgemm(alpha=1.0, a=weights.T, b=self.components.T, trans_a=True, trans_b=True)
Пример #21
0
def dgemm(a, b, c=None, alpha=1.0, beta=0.0):
    ''' Performs dgemm in Fortran memory alignment without copying.
        Input matrix should be contiguous (either F or C).
    
    Parameters
    ----------
    a : array
        input matrix a
    b : array
        input matrix b
    c : array, optional
        output matrix c, if None then it is allocated inside the
        function, default None
    alpha : float, optional
        scalar factor for matrix a
    beta : float, optional
        scalar factor for matrix c

    Returns
    -------
    c : ndarray
        output matrix
    '''

    #FIXME: this needs testing better - currently this should only be
    # used where it has been tested against np.dot specifically for
    # that use case!

    if (not _is_contiguous(a)) or (not _is_contiguous(b)):
        log.warn('DGEMM called on non-contiguous data')

    m, k = a.shape
    n = b.shape[1]
    assert k == b.shape[0]

    a, ta = _reorder_fortran(a)
    b, tb = _reorder_fortran(b)

    if c is None:
        c = np.zeros((m, n), dtype=np.float64, order='C')

    if m == 0 or n == 0 or k == 0:
        return c

    c, tc = _reorder_fortran(c)

    c = blas.dgemm(alpha=alpha,
                   a=b,
                   b=a,
                   c=c,
                   beta=beta,
                   trans_a=not tb,
                   trans_b=not ta)

    c, tc = _reorder_c(c)

    return c
Пример #22
0
    def project_whitened_vector(self, vector_instance):
        """
        Returns a sheared (non-orthogonal) reconstruction of
        ``vector_instance``.

        Parameters
        ----------
        vector_instance : (n_features,) ndarray
            A novel vector.

        Returns
        -------
        sheared_reconstruction : (n_features,) ndarray
            A sheared (non-orthogonal) reconstruction of ``vector_instance``
        """
        whitened_components = self.whitened_components
        weights = dgemm(alpha=1.0, a=vector_instance.T,
                        b=whitened_components.T, trans_a=True)
        return dgemm(alpha=1.0, a=weights.T, b=whitened_components.T,
                     trans_a=True, trans_b=True)
Пример #23
0
def linear_least_squares(a, b, residuals=False):
    """
    Return the least-squares solution to a linear matrix equation.
    Solves the equation `a x = b` by computing a vector `x` that
    minimizes the Euclidean 2-norm `|| b - a x ||^2`.  The equation may
    be under-, well-, or over- determined (i.e., the number of
    linearly independent rows of `a` can be less than, equal to, or
    greater than its number of linearly independent columns).  If `a`
    is square and of full rank, then `x` (but for round-off error) is
    the "exact" solution of the equation.
    Parameters
    ----------
    a : (M, N) array_like
        "Coefficient" matrix.
    b : (M,) array_like
        Ordinate or "dependent variable" values.
    residuals : bool
        Compute the residuals associated with the least-squares solution
    Returns
    -------
    x : (M,) ndarray
        Least-squares solution. The shape of `x` depends on the shape of
        `b`.
    residuals : int (Optional)
        Sums of residuals; squared Euclidean 2-norm for each column in
        ``b - a*x``.
    """
    if type(a) != np.ndarray or not a.flags['C_CONTIGUOUS']:
        warn('Matrix a is not a C-contiguous numpy array. The solver will create a copy, which will result' + \
             ' in increased memory usage.')

    a = np.asarray(a, order='c')
    i = dgemm(alpha=1.0, a=a.T, b=a.T, trans_b=True)
    x = np.linalg.solve(i, dgemm(alpha=1.0, a=a.T, b=b)).flatten()

    if residuals:
        return x, np.linalg.norm(np.dot(a, x) - b)
    else:
        return x
Пример #24
0
def applyBND(mat, **kwargs):
    """Return a BND refinement of a symetric matrix.
    If the matrix is not symetric. The calculation for mat+mat.T will be
    performanced.
    It has comparable speed with the original MATLAB code.

    [SUP15] Sun, Hai‐Ping, et al. "Improving accuracy of protein contact
    prediction using balanced network deconvolution." Proteins: Structure,
    Function, and Bioinformatics (2015)."""

    try:
        ndim, shape = mat.ndim, mat.shape
    except AttributeError:
        raise TypeError('Matrix must be a 2D square array')

    from numpy import fill_diagonal
    from ..Application.math import eigh
    from scipy.linalg.blas import dgemm
    if ndim != 2 or shape[0] != shape[1]:
        raise ValueError('Matrix must be a 2D square array')

    if mat.min() != mat.max():
        mat = (mat - mat.min()) / (mat.max() - mat.min())
    else:
        from ..IO.output import printInfo
        printInfo("The input matrix is a constant matrix.")
        return mat
    n = mat.shape[0]
    fill_diagonal(mat, 0.)
    mat_th = (mat + mat.T) / 2.
    # Double symetric eigvector relatively robust representation (RRR)
    d, u = eigh(mat_th)
    for i in xrange(n):
        if d[i] != 0:
            d[i] = (-1. + (1 + 4 * d[i] * d[i]) ** .5) / 2. / d[i]
    # Double general matrix multiply
    mat_new1 = dgemm(alpha=1.0, a=(u * d), b=u, trans_b=True)
    # mat_new1 = (u*d).dot(u.T) #old numpy dot,slower
    ind_edges = (mat_th > 0) * 1.0
    ind_nonedges = (mat_th == 0) * 1.0
    m1 = (mat * ind_nonedges).max()
    m2 = mat_new1.min()
    mat_new2 = (mat_new1 + max(m1 - m2, 0)) * ind_edges + (mat * ind_nonedges)
    m1 = mat_new2.min()
    m2 = mat_new2.max()
    if m1 != m2:
        mat_bnd = (mat_new2 - m1) / (m2 - m1)
    else:
        mat_bnd = mat_new2
    return mat_bnd
Пример #25
0
def compute_metric_gmm(direction, criterion, variables, model_cv, samples, labels, idx):
    """
        Function that computes the accuracy of the model_cv using the variables : idx +/- one of variables
        Inputs:
            direction:      'backward' or 'forward' or 'SFFS'
            criterion:      criterion function use to discriminate variables
            variables:      the variable to add or delete to idx
            model_cv:       the model build with all the variables
            samples,labels: the samples/label for testing
            idx:            the pool of retained variables
        Output:
            metric: the estimated metric

        Used in GMM.forward_selection(), GMM.backward_selection()
    """
    # Initialization
    metric     = sp.zeros(variables.size)
    confMatrix = ConfusionMatrix()
    
    # Compute inv of covariance matrix
    if len(idx)==0:
        logdet = None
        Qs     = None
        scores_t   = None            
    else:
        logdet     = sp.empty((model_cv.C))
        Qs         = sp.empty((model_cv.C,len(idx),len(idx)))
        scores_t   = sp.empty((model_cv.C,samples.shape[0]))

        for c in xrange(model_cv.C): # Here -> store Qs and scores_t
            vp,Q,rcond    = model_cv.decomposition(model_cv.cov[c,idx,:][:,idx])
            Qs[c,:,:] = Q/sp.sqrt(vp)
            logdet[c]     = sp.sum(sp.log(vp))
            # Pre compute score
            testSamples_c           = samples[:,idx] - model_cv.mean[c,idx]
            temp = dgemm(1.,Qs[c,:,:].T,testSamples_c,trans_b=True)
            scores_t[c,:] = sp.sum(temp**2,axis=0)

    for i,var in enumerate(variables):
        predLabels = model_cv.predict_gmm_update(direction,samples,logdet,(i,var),Qs,scores_t,featIdx=idx)[0]
    
        confMatrix.compute_confusion_matrix(predLabels,labels)
        if criterion=='accuracy':
            metric[i] = confMatrix.get_OA()
        elif criterion=='F1Mean':
            metric[i] = confMatrix.get_F1Mean()
        elif criterion=='kappa':
            metric[i] = confMatrix.get_kappa()

    return metric
Пример #26
0
    def kronvec_prod(self, x):
        """
		Computes K*x where K is a kronecker product matrix and x is a column 
		vector which is a numpy array

		Inputs:
			self : (N,M) KronMatrix
			x : (M,1) matrix

		Outputs:
			y : (N,1) matrix

		this routine ensures all matricies are in fortran contigous format 
		before using blas routines

		it also takes advantage of matrix symmetry (if sym flag is set)
		"""
        if x.shape != (self.shape[1], 1):
            raise ValueError('x is the wrong shape, must be (%d,1), not %s'\
                 % (self.shape[1],repr(x.shape)))

        y = x
        for i, Ki in reversed(list(enumerate(self.K))):
            y = np.reshape(y, (self.sshape[i, 1], -1), order='F')
            if isinstance(Ki, np.ndarray):  # use optimized blas routines
                if np.isfortran(Ki):
                    a = Ki
                else:
                    a = Ki.T
                if self.sym:
                    if np.isfortran(y):
                        y = blas.dsymm(alpha=1, a=a, b=y, side=0).T
                    else:
                        # just switch the order
                        y = blas.dsymm(alpha=1, a=a, b=y.T, side=1)
                else:
                    if np.isfortran(y):
                        b = y
                    else:
                        b = y.T
                    y = blas.dgemm(alpha=1,
                                   a=a,
                                   b=b,
                                   trans_a=(not np.isfortran(Ki)),
                                   trans_b=(not np.isfortran(y))).T
            else:  # use __mul__ routine
                y = (Ki * y).T
        y = y.reshape((-1, 1), order='F')  # reshape to a column vector
        return y
Пример #27
0
def project_onto_null_space(a, n, nt, lb, ub, n_steps_projection, reactions, tol=1e-4, verbose=0):
    # Do matrix multuplication
    a = dgemm(1.0, n, dgemm(1.0, nt, a)).transpose()[0]

    i, accept = 0, True
    while any(a < lb - tol) or any(a > ub + tol):
        print '[WARNING] Bounds reseted: %d times' % (i+1)
        print '\t lower bound: ', reactions[a < lb - tol]
        print '\t upper bound: ', reactions[a > ub + tol]

        # Fix bounds violations
        lb_violations = a < lb - tol
        if any(lb_violations):
            a[lb_violations] = lb[lb_violations]

        ub_violations = a > ub
        if any(ub_violations):
            a[ub_violations] = ub[ub_violations]

        # Do matrix multiplication
        a = dgemm(1.0, n, dgemm(1.0, nt, a)).transpose()[0]

        # Verbose
        if i > 10:
            accept, n_steps_projection = False, max(25, n_steps_projection - 100)
            # Verbose
            if verbose > 1:
                print '[WARNING] Point discarded! lb: %r, ub: %r' % (any(lb_violations), any(ub_violations))
            break

        i += 1

    if i == 0:
        n_steps_projection = min(25, n_steps_projection + 25)

    return accept, n_steps_projection, a.copy()
Пример #28
0
    def project_vectors(self, vectors):
        """
        Projects each of the `vectors` onto the model, retrieving
        the optimal linear reconstruction weights for each instance.

        Parameters
        ----------
        vectors : (n_samples, n_features) ndarray

        Returns
        -------
        weights : (n_samples, n_components) ndarray
            The matrix of optimal linear weights

        """
        return dgemm(alpha=1.0, a=vectors.T, b=self.components.T, trans_a=True)
Пример #29
0
    def project_vectors(self, vectors):
        """
        Projects each of the `vectors` onto the model, retrieving
        the optimal linear reconstruction weights for each instance.

        Parameters
        ----------
        vectors : (n_samples, n_features) ndarray

        Returns
        -------
        weights : (n_samples, n_components) ndarray
            The matrix of optimal linear weights

        """
        return dgemm(alpha=1.0, a=vectors.T, b=self.components.T, trans_a=True)
Пример #30
0
def gen_time_results(mat_size, no_runs):
    mat_A = np.random.rand(mat_size, mat_size)
    mat_B = np.random.rand(mat_size, mat_size)
    time_list_d = []
    time_list_s = []
    for _ in range(no_runs):
        start = time.perf_counter()
        result = FB.dgemm(alpha=1, a=mat_A, b=mat_B)
        finish = time.perf_counter()
        time_taken_d = round(finish - start, 10)
        time_list_d.append(time_taken_d)

        start = time.perf_counter()
        result = FB.sgemm(alpha=1, a=mat_A, b=mat_B)
        finish = time.perf_counter()
        time_taken_s = round(finish - start, 10)
        time_list_s.append(time_taken_s)
    return time_list_d, time_list_s
Пример #31
0
    def mytdot(a, b):
        if a.flags.c_contiguous:
            a_T = False
            a = a.T
        elif a.flags.f_contiguous:
            a_T = True
        else:
            a_T = True
            a = np.asfortranarray(a)

        if b.flags.c_contiguous:
            b_T = False
            b = b.T
        elif b.flags.f_contiguous:
            b_T = True
        else:
            b_T = True
            b = np.asfortranarray(b)
        return bl.dgemm(alpha=1.0, a=b, b=a, trans_a=b_T, trans_b=a_T).T
Пример #32
0
    def single_propagation(self, query, src_net, corr_function=None):
        network = self.graphdata.networks[src_net].matrix
        names = self.graphdata.networks[src_net].node_names
        query_vector = self.generate_query_vector(query, src_net)
        initial_score = RWR(query_vector, network)

        vectors = initial_score.tolist()

        corr_method = self._get_correlation_method(corr_function)
        d_vectors = dgemm(alpha=1., a=vectors, b=vectors, trans_a=True)[0][0]
        n_paths = 1

        corr_score = self.compute_correlation_scores(network,
                                                     vectors,
                                                     d_vectors,
                                                     src_net,
                                                     n_paths,
                                                     corr_method)

        return corr_score
Пример #33
0
def gaussian_kernel_matrix(P, Q, c):
    """
    Calculate kernel matrix using gaussian kernel.
    
    ||p-q||^2 = ||p||^2 + ||q||^2 - 2 * p^T * q

    k(p,q) = exp(-c*||p-q||^2)
           = exp(-c*[||p||^2 + ||q||^2 - 2 * p^T * q])
           
    C: dgemm(alpha=1.0, a=P, b=Q, trans_b=True)  double precision
    C: sgemm(alpha=1.0, a=P, b=Q, trans_b=True)  single precision
    C: np.dot(P,Q.T)                             single precision
            P = P.astype(np.float32)
            Q = Q.astype(np.float32) 

    Parameters:
    -----------
    P : `numpy.ndarray`
        (nDataP, nDim) matrix of data. Each row corresponds to a data point.
    Q : `numpy.ndarray`
        (nDataQ, nDim) matrix of data. Each row corresponds to a data point.            
    c : `int`
        Width of the gaussian kernel function.

    Returns:
    --------
    K : `numpy.ndarray`
        (nDataP,nDataQ) matrix, the gaussian kernel matrix of the P and Q data matrix.                 
    """
    # Calculate norm
    P_norm = np.einsum('ij,ij->i', P, P)
    Q_norm = np.einsum('ij,ij->i', Q, Q)
    return ne.evaluate(
        'exp(-gamma * (A + B - 2*C))', {
            'A': P_norm[:, None],
            'B': Q_norm[None, :],
            'C': dgemm(alpha=1.0, a=P, b=Q, trans_b=True),
            'gamma': c
        })
            finish = time.perf_counter()
            time_taken = round(finish - start, 8)
            total_time_Numpy += time_taken
            #assert mn.all() == ans.all()
        print(total_time_Numpy / no_runs)
    print("\n")

    print("----LAPACK DGEMM----")
    for mat_size in mat_sizes:
        print(f"Mat size: {mat_size}")
        total_time_DGEMM = 0
        for _ in range(no_runs):
            m1 = np.random.rand(mat_size, mat_size)
            m2 = np.random.rand(mat_size, mat_size)
            start = time.perf_counter()
            md = FB.dgemm(alpha=1, a=m1, b=m2)
            finish = time.perf_counter()
            time_taken = round(finish - start, 8)
            total_time_DGEMM += time_taken
            #assert md.all() == ans.all()
        print(total_time_DGEMM / no_runs)
    print("\n")

    print("---- LAPACK SGEMM----")
    for mat_size in mat_sizes:
        print(f"Mat size: {mat_size}")
        total_time_SGEMM = 0
        for _ in range(no_runs):
            m1 = np.random.rand(mat_size, mat_size)
            m2 = np.random.rand(mat_size, mat_size)
            start = time.perf_counter()
Пример #35
0
 def _instance_vectors_for_full_weights(self, full_weights):
     return dgemm(alpha=1.0, a=full_weights.T, b=self.components.T,
                  trans_a=True, trans_b=True)
Пример #36
0
def principal_component_decomposition(X, whiten=False, center=True,
                                      bias=False, inplace=False):
    r"""
    Apply PCA on the data matrix X. In the case where the data matrix is very
    large, it is advisable to set `inplace=True`. However, note this this
    destructively edits the data matrix by subtracting the mean inplace.

    Parameters
    ----------
    x : (n_samples, n_features) ndarray
        Training data
    whiten : bool, optional
        Normalise the eigenvectors to have unit magnitude

        Default: `False`
    center : bool, optional
        Whether to center the data matrix. If `False`, zero will be subtracted.

        Default: `True`
    bias : bool, optional
        Whether to use a biased estimate of the number of samples. If `False`,
        subtracts `1` from the number of samples.

        Default: `False`
    inplace : bool, optional
        Whether to do the mean subtracting inplace or not. This is crucial if
        the data matrix is greater than half the available memory size.

        Default: `False`

    Returns
    -------
    eigenvectors : (n_components, n_features) ndarray
        The eigenvectors of the data matrix
    eigenvalues : (n_components,) ndarray
        The positive eigenvalues from the data matrix
    mean_vector : (n_components,) ndarray
        The mean that was subtracted from the dataset
    """
    n_samples, n_features = X.shape

    if bias:
        N = n_samples
    else:
        N = n_samples - 1.0

    if center:
        # center data
        mean_vector = np.mean(X, axis=0)
    else:
        mean_vector = np.zeros(n_features)

    # This is required if the data matrix is very large!
    if inplace:
        X -= mean_vector
    else:
        X = X - mean_vector

    if n_features < n_samples:
        # compute covariance matrix
        # S:  n_features  x  n_features
        S = dgemm(alpha=1.0, a=X.T, b=X.T, trans_b=True) / N
        # S should be perfectly symmetrical, but numerical error can creep
        # in. Enforce symmetry here to avoid creating complex
        # eigenvectors from eigendecomposition
        S = (S + S.T) / 2.0

        # perform eigenvalue decomposition
        # eigenvectors:  n_features x  n_features
        # eigenvalues:   n_features
        eigenvectors, eigenvalues = eigenvalue_decomposition(S)

        if whiten:
            # whiten eigenvectors
            eigenvectors *= np.sqrt(1.0 / eigenvalues)

    else:
        # n_features > n_samples
        # compute covariance matrix
        # S:  n_samples  x  n_samples
        S = dgemm(alpha=1.0, a=X.T, b=X.T, trans_a=True) / N
        # S should be perfectly symmetrical, but numerical error can creep
        # in. Enforce symmetry here to avoid creating complex
        # eigenvectors from eigendecomposition
        S = (S + S.T) / 2.0

        # perform eigenvalue decomposition
        # eigenvectors:  n_samples  x  n_samples
        # eigenvalues:   n_samples
        eigenvectors, eigenvalues = eigenvalue_decomposition(S)

        # compute final eigenvectors
        # eigenvectors:  n_samples  x  n_features
        if whiten:
            w = (N * eigenvalues) ** -1.0
        else:
            w = np.sqrt(1.0 / (N * eigenvalues))
        eigenvectors = w * dgemm(alpha=1.0, a=X.T, b=eigenvectors.T,
                                 trans_b=True)

    # transpose eigenvectors
    # eigenvectors:  n_samples  x  n_features
    eigenvectors = eigenvectors.T

    return eigenvectors, eigenvalues, mean_vector
Пример #37
0
def mean_cov(X):
	n,p = X.shape
	m = X.mean(axis=0)
	cx = X - m
	S = dgemm(1./(n-1), cx.T, cx.T, trans_a=0, trans_b=1)
	return cx,m,S.T
Пример #38
0
Файл: npfs.py Проект: Laadr/FFFS
    def predict_gmm_update(self, direction, testSamples, logdet, newFeat, Qs,scores_t,featIdx):
        """
            Function that predict the label for testSamples using the learned model (with an update method of the inverse of covariance matrix)
            Inputs:
                direction:   'backward' or 'forward'
                testSamples: the samples to be classified
                invCov:      inverte of covariance matrix of already selected features
                newFeat:     couple (index,feature) to add or delete
                featIdx:     indices of features to use for classification
            Outputs:
                predLabels: the class
                scores:     the decision value for each class
        """
        # Get machine precision
        eps = sp.finfo(sp.float64).eps

        # Get information from the data
        nbTestSpl = testSamples.shape[0] # Number of testing samples
        scores = sp.empty((nbTestSpl,self.C))
        
        # New set of features
        if direction=='forward':
            id_t = list(featIdx)
            id_t.append(newFeat[1])
        elif direction=='backward':
            id_t = list(featIdx)

        # Start the prediction for each class
        for c in xrange(self.C):
            testSamples_c = testSamples[:,id_t] - self.mean[c,id_t]

            if len(id_t)==1:
                scores[:,c] = sp.sum(testSamples_c*testSamples_c,axis=1)/self.cov[c,id_t,id_t] + sp.log(self.cov[c,id_t,id_t]) - self.logprop[c]
            else:
                if direction=='forward':
                    temp = dgemm(1.0,Qs[c,:,:].T,self.cov[c,newFeat[1],:][featIdx].T)
                    alpha = self.cov[c,newFeat[1],newFeat[1]] - sp.sum(temp**2)
                    if alpha < eps:
                        alpha = eps
                    logdet_update = sp.log(alpha) + logdet[c]

                    row_feat                = sp.empty((len(id_t)))
                    row_feat[:len(featIdx)] = dgemm(-1/alpha,Qs[c,:,:],temp).flatten()
                    row_feat[-1]            = 1/alpha
                    cst_feat                = alpha * (sp.dot(row_feat,testSamples_c.T)**2)

                elif direction=='backward':
                    alpha        = 1/invCov[c,newFeat[0],newFeat[0]]
                    if alpha < eps:
                        alpha = eps
                    logdet_update = - sp.log(alpha) + logdet[c]

                    row_feat      = invCov[c,newFeat[0],:]
                    cst_feat      = - alpha * (sp.dot(row_feat,testSamples_c.T)**2)

                cst = logdet_update - self.logprop[c] # Pre compute the constant term

                # temp = sp.dot(testSamples_c,invCov[c,:,:][:,:]) # Here -> symmetric dot product
                scores[:,c] = scores_t[c,:] + cst_feat + cst

                del temp
            del testSamples_c

        # Assign the label to the minimum value of scores
        predLabels = sp.argmin(scores,1)+1
        return predLabels,scores
Пример #39
0
def principal_component_decomposition(X, whiten=False, center=True,
                                      bias=False):
        r"""
        Apply PCA on the data matrix X.

        Parameters
        ----------
        x : (n_samples, n_features) ndarray
            Training data

        Returns
        -------
        eigenvectors : (n_components, n_features) ndarray
        eigenvalues : (n_components,) ndarray
        """
        n_samples, n_features = X.shape

        if bias:
            N = n_samples
        else:
            N = n_samples - 1

        if center:
            # center data
            mean_vector = np.mean(X, axis=0)
        else:
            mean_vector = np.zeros(n_features)

        X -= mean_vector

        if n_features < n_samples:
            # compute covariance matrix
            # S:  n_features  x  n_features
            S = dgemm(alpha=1.0, a=X.T, b=X.T, trans_b=True) / N
            # S should be perfectly symmetrical, but numerical error can creep
            # in. Enforce symmetry here to avoid creating complex
            # eigenvectors from eigendecomposition
            S = (S + S.T) / 2

            # perform eigenvalue decomposition
            # eigenvectors:  n_features x  n_features
            # eigenvalues:   n_features
            eigenvectors, eigenvalues = eigenvalue_decomposition(S)

            if whiten:
                # whiten eigenvectors
                eigenvectors *= eigenvalues ** -0.5

        else:
            # n_features > n_samples
            # compute covariance matrix
            # S:  n_samples  x  n_samples
            S = dgemm(alpha=1.0, a=X.T, b=X.T, trans_a=True) / N
            # S should be perfectly symmetrical, but numerical error can creep
            # in. Enforce symmetry here to avoid creating complex
            # eigenvectors from eigendecomposition
            S = (S + S.T) / 2

            # perform eigenvalue decomposition
            # eigenvectors:  n_samples  x  n_samples
            # eigenvalues:   n_samples
            eigenvectors, eigenvalues = eigenvalue_decomposition(S)

            aux = 2
            if whiten:
                # will cause eigenvectors to be whiten
                aux = 1

            # compute final eigenvectors
            # eigenvectors:  n_samples  x  n_features
            w = (N * eigenvalues) ** (-1 / aux)
            eigenvectors = w * dgemm(alpha=1.0, a=X.T, b=eigenvectors.T,
                                     trans_b=True)

        # transpose eigenvectors
        # eigenvectors:  n_samples  x  n_features
        eigenvectors = eigenvectors.T

        return eigenvectors, eigenvalues, mean_vector
Пример #40
0
    def calculate(self, theta, data, sparse):
        '''
        Calculates the cost function of a Sparse network for a given data using
        feedforward and backpropagation algorithms. 

        Parameters
        ----------
        theta : Theta
            Theta values of the Sparse network.

        data : numpy.ndarray
            Training data.

        sparse : Sparse
            Sparse network.

        Returns
        -------
        cost_function : float
            Value of the cost function.

        total_partials : numpy.ndarray
            Matrix containing theta and bias values.
        '''
        th1, th2, b1, b2 = wrap_matrices(sparse, theta)
        num_samples = data.shape[0]

        # layer 1
        a1 = data

        # layer 2
        bias2 = np.dot(np.ones((num_samples, 1)), b1.T)
        z2 = np.dot(th1, data.T).T + bias2
        a2 = sigmoid(z2)

        # layer 3
        bias3 = np.dot(np.ones((num_samples, 1)), b2.T)
        z3 = np.dot(th2, a2.T).T + bias3
        a3 = sigmoid(z3)

        ### feedforward cost ###
        reg = np.sum(np.power(th1, 2)) + np.sum(np.power(th2, 2))
        norm = np.power(a3 - data, 2)
        Jnn = 0.5 * np.sum(norm) / num_samples + 0.5 * sparse.lamb * reg

        rhoest = np.sum(a2, axis=0) / num_samples
        kl = self.__compute_kl(sparse.rho, rhoest)
        cost_function = Jnn + sparse.beta * np.sum(kl)

        ### Backpropagation ##
        sum2 = sparse.beta * (-sparse.rho / rhoest + (1.0 - sparse.rho) /
                              (1.0 - rhoest))
        delta3 = (-(data - a3) * a3 * (1 - a3)).T
        sum1 = dgemm(alpha=1.0, a=th2.T, b=delta3, trans_b=False)
        sum2 = sum2[:, np.newaxis]
        delta2 = np.add(sum1, sum2) * a2.T * (1 - a2.T)

        partial_j1 = np.dot(delta2, a1)
        partial_j2 = np.dot(delta3, a2)
        partial_b1 = delta2.sum(axis=1)
        partial_b2 = delta3.sum(axis=1)
        total_partials = [
            1.0 / num_samples * partial_j1 + sparse.lamb * th1,
            1.0 / num_samples * partial_j2 + sparse.lamb * th2,
            1.0 / num_samples * partial_b1, 1.0 / num_samples * partial_b2
        ]
        return cost_function, unwrap_matrices(total_partials)
Пример #41
0
#  suppliers or licensors in any way.
#
#*******************************************************************************
#  Content:
#      dgemm example
#*******************************************************************************

import numpy as np
import scipy.linalg.blas as slb
import time

M = 10000
N = 10000
matrix_list = [1024, 2048, 4096, 8192, 10000]

for K in matrix_list:

    a = np.array(np.random.random((M, K)), dtype=np.double, order='F')
    b = np.array(np.random.random((K, N)), dtype=np.double, order='F')

    A = np.matrix(a, dtype=np.double, copy=False)
    B = np.matrix(b, dtype=np.double, copy=False)

    start = time.time()
    C = slb.dgemm(1.0, a=A, b=B)
    end = time.time()

    texec = end - start
    print('M x N x K == ', M, N, K, '.. Scipy Execution Time == ', texec,
          'sec')
Пример #42
0
NITER = 30

A_array = random((N,N))
B_array = random((N,N))
C_array = zeros((N,N))

start = time.time()
for it in range(NITER):
	C_array = dot(A_array,B_array)
stop = time.time()

print"Time for 2d array dot product is:", (stop - start)/NITER

A_mat = matrix(A_array)
B_mat = matrix(B_array)
C_mat = matrix(C_array)

start = time.time()
for it in range(NITER):
	C_mat = A_mat*B_mat
stop = time.time()
print "Time for Matrix multiplication is:", (stop-start)/NITER

alpha = 1
start = time.time()
for it in range(NITER):
	C_array = dgemm(alpha, A_array, B_array)
stop = time.time()
print "Time for dgemm is:", (stop-start)/NITER

#env = os.environ

if len(sys.argv) >= 2:
    K = int(sys.argv[1])
    M = K
    N = K
    ITER = 1
else:
    print "usage: python numpy_dgemm.py MATRIX_DIMENSION"
    sys.exit()

a1 = np.array(np.random.random((M, K)), dtype=np.double, order='C', copy=False)
a2 = np.array(np.random.random((K, N)), dtype=np.double, order='C', copy=False)

m1 = np.matrix(a1, dtype=np.double, copy=False)
m2 = np.matrix(a2, dtype=np.double, copy=False)

t_start = time.time()

for i in range(ITER):
    #mf2 = np.dot(m1, m2)
    #print "%d: dot(m1, m2)" % i
    mf2 = sp.dgemm(alpha=1.0, a=m1, b=m2)

t_end = time.time()

#print "= = = PYTHON NUMPY MKL = = ="
#print
print "dgemm(%dx%d, %dx%d), %d iterations" % (M, K, K, N, ITER)
print "dot(m1, m2): %f sec" % (t_end - t_start)
Пример #44
0
Файл: npfs.py Проект: Laadr/FFFS
def compute_cov(xj,mean,ni):
    """
    """
    Xc = xj-mean
    cov = dgemm(1.0/(ni-1),Xc,Xc,trans_a=True,trans_b=False) # dsyrk could have been used -> but then triangular matrix must be handled everywhere
    return cov
Пример #45
0
    def calculate(self, theta, data, sparse):
        '''
        Calculates the cost function of a Sparse network for a given data using
        feedforward and backpropagation algorithms. 

        Parameters
        ----------
        theta : Theta
            Theta values of the Sparse network.

        data : numpy.ndarray
            Training data.

        sparse : Sparse
            Sparse network.

        Returns
        -------
        cost_function : float
            Value of the cost function.

        total_partials : numpy.ndarray
            Matrix containing theta and bias values.
        '''
        th1, th2, b1, b2 = wrap_matrices(sparse, theta)
        num_samples = data.shape[0]

        # layer 1
        a1 = data

        # layer 2
        bias2 = np.dot(np.ones((num_samples,1)),b1.T)
        z2 = np.dot(th1, data.T).T + bias2
        a2 = sigmoid(z2)

        # layer 3
        bias3 = np.dot(np.ones((num_samples,1)),b2.T)
        z3 = np.dot(th2, a2.T).T + bias3
        a3 = sigmoid(z3)

        ### feedforward cost ###
        reg = np.sum(np.power(th1, 2)) + np.sum(np.power(th2, 2))
        norm = np.power(a3 - data, 2)
        Jnn = 0.5 * np.sum(norm) / num_samples + 0.5 * sparse.lamb * reg

        rhoest = np.sum(a2, axis=0) / num_samples
        kl = self.__compute_kl(sparse.rho, rhoest)
        cost_function = Jnn + sparse.beta * np.sum(kl)
           
        ### Backpropagation ##
        sum2 = sparse.beta * (-sparse.rho / rhoest + (1.0 - sparse.rho) / (1.0 - rhoest))
        delta3 = (-(data - a3) * a3 * (1 - a3)).T
        sum1 = dgemm(alpha=1.0, a=th2.T, b=delta3, trans_b=False)
        sum2 = sum2[:,np.newaxis]
        delta2 = np.add(sum1,sum2)* a2.T * (1 - a2.T)

        partial_j1 = np.dot(delta2, a1)
        partial_j2 = np.dot(delta3, a2)
        partial_b1 = delta2.sum(axis=1)
        partial_b2 = delta3.sum(axis=1)
        total_partials = [1.0 / num_samples * partial_j1 + sparse.lamb * th1, 1.0 / num_samples * partial_j2 + sparse.lamb * th2, 1.0 / num_samples * partial_b1, 1.0 / num_samples * partial_b2]    
        return cost_function, unwrap_matrices(total_partials)