示例#1
0
def calculateError(partition):
    """
    Calculate Frobenius Norm of difference between tensor slices and decomposed tensor.
    """
    ret = []
    rows = list(partition)
    normX = 0.0
    error = 0.0
    for row in rows:
        Xi = row[1]
        Ci = row[2]
        normX = normX + np.square(norm(Xi, 2))
        error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))
        '''
        (Ki,I,J) = Xi.shape
        for i in range(0,I):
            for j in range(0,J):
                for k in range(0,Ki):
                    sum = 0.0
                    for r in range(0,R):
                        sum = sum + A.item(i,r) * B.item(j,r) * Ci.item(k,r)
                    x = Xi.item((k,i,j))
                    error = error + np.square(sum) - (2.0*sum*x)
                    normX = normX + np.square(x)
        '''
    ret.append(['error',error])
    ret.append(['normX',normX])
    return ret
示例#2
0
def createTensorSlice(partition):
    ret = []
    rows = list(partition)

    rowCount = len(rows)
    stepSize = rowCount

    for row in rows:
        if c > 0:
            Ci = createCollinearMatrix(Ki, R, c)
        else:
            Ci = np.random.rand(Ki, R)
        #Xi = outerProduct (A, B, Ci)
        Xi = kruskal_to_tensor([Ci, A, B])
        N1 = np.random.randn(Ki, I, J)
        N2 = np.random.randn(Ki, I, J)
        normXi = norm(Xi, 2)
        normN1 = norm(N1, 2)
        normN2 = norm(N2, 2)

        filename = 'X-' + str(row * Ki)

        for l1 in l1Range:
            for l2 in l2Range:
                add = '-C' + str(c) + '-L1_' + str(l1) + '-L2_' + str(
                    l2) + '-' + str(globalN) + '/'
                newOutputDir = outputDir + add
                newHDFSDir = hdfsDir + add
                if l1 > 0:
                    Xi1 = Xi + math.pow(
                        ((100 / l1) - 1), -0.5) * (normXi / normN1) * N1
                else:
                    Xi1 = Xi
                if l2 > 0:
                    N2Xi1 = N2 * Xi1
                    Xi2 = Xi1 + math.pow(
                        ((100 / l2) - 1),
                        -0.5) * (norm(Xi1, 2) / norm(N2Xi1, 2)) * N2Xi1
                else:
                    Xi2 = Xi1

                np.save(newOutputDir + filename, Xi2)
                subprocess.call([
                    'hadoop fs -moveFromLocal ' + newOutputDir + filename +
                    '.npy ' + newHDFSDir
                ],
                                shell=True)


#        print Xi.shape
        ret.append(row)
    return ret
示例#3
0
def getTensorDimensions(partition):
    """
    Spark job to process each slice and return its local tensor dimensions.
    """
#    print '****** get tensor dim ******'
    ret = []
    rows = list(partition)
    for row in rows:
	Xi = row[1]
	a = []
	a.extend(Xi.shape)
	a.append(np.square(norm(Xi, 2)))
	ret.append(a)
    return [tensorOps.getDim (ret)]
示例#4
0
def calculateFNorm(partition):
    """
    Calculate Frobenius Norm of tensor slices.
    """
    ret = []
    rows = list(partition)
    normX = 0.0
    for row in rows:
        Xi = row[1]
        normX = normX + np.square(norm(Xi, 2))
        '''
        (Ki,I,J) = Xi.shape
        for i in range(0,I):
            for j in range(0,J):
                for k in range(0,Ki):
                    normX = normX + np.square(Xi.item((k,i,j)))
        '''
    return ([normX])
示例#5
0
def saveFactorMatrices(partition):
    """
    Spark job to solve for and save each Ci factor matrix.
    """
    ret = []
    rows = list(partition)
    error = 0.0
    for row in rows:
        label = row[0]
        Xi = row[1]
        Ki = Xi.shape[0]
        dashIdx = label.rindex('-')
        dotIdx = label.rindex('.')
        labelId = int(label[dashIdx + 1:dotIdx])

        # solve for Ci
        Ci = np.zeros((Ki, R))
        ZiTZic = tensorOps.ZTZ(A, B)
        XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
        if regularization > 0:
            ZiTZic = ZiTZic + regulParam * eye
        Ci = solve(ZiTZic.T, XiZic.T).T
        #print Ci

        if outputDir != '':
            # save Ci
            filename = './Ci-' + str(labelId)
            np.save(filename, Ci)

            # save A & B
            if labelId == 0:
                filename = './A'
                np.save(filename, A)
                filename = './B'
                np.save(filename, B)

        error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))

    if outputDir != '':
        subprocess.call(['hadoop fs -moveFromLocal ' + './*.npy ' + outputDir],
                        shell=True)

    ret.append(['error', error])
    return ret
示例#6
0
import numpy as np
from tensorly.kruskal import kruskal_to_tensor
from tensorly.tenalg import norm
from tensortools.cpfit import _compute_squared_recon_error_naive, _compute_squared_recon_error

# make factors
dims = [20, 30, 40]
ndim = len(dims)
rank = 5
factors = [np.random.randn(n, rank) for n in dims]

# make data
tensor = kruskal_to_tensor(factors)
norm_tensor = norm(tensor, 2)

err1 = _compute_squared_recon_error_naive(tensor, factors, norm_tensor)
err2 = _compute_squared_recon_error(tensor, factors, norm_tensor)

f2 = [np.random.randn(n, rank) for n in dims]

err3 = _compute_squared_recon_error_naive(tensor, f2, norm_tensor)
err4 = _compute_squared_recon_error(tensor, f2, norm_tensor)

assert (np.abs(err1 - err2) < 1e-6)
assert (np.abs(err3 - err4) < 1e-6)
示例#7
0
def singleModeALSstep(partition):
    """
    Runs a single step of Alternating Least Squares to solve for one of A (mode = 1),
    B (mode = 2), or C (mode = 3) matrix.
    """
    '''
    if decompMode == 1:
        print 'Solving for A....'
    elif decompMode == 2:
        print 'Solving for B....'
    elif decompMode == 3:
        print 'Solving for Ci...'
    '''
    ret = []
    rows = list(partition)
    ZiTZi = 0
    XiZi = 0

    error = 0.0

    for row in rows:
        label = row[0]
        Xi = row[1]
        Ki = Xi.shape[0]
	# make sure not to skip over slice if we're calculating error on full tensor
#	if (sketching > 0 or (decompMode == 3 and errorCalcSketchingRate < 1)) and not (decompMode == 3 and errorCalcSketchingRate == 1) and not (decompMode == 3 and onUpdateWeightLoop):
	if ((sketching > 0 and sketchingRate < 1.0) or (decompMode == 3 and errorCalcSketchingRate < 1)) and not (decompMode == 3 and errorCalcSketchingRate == 1) and not (decompMode == 3 and onUpdateWeightLoop):
	    dashIdx=label.rindex('-')
	    dotIdx=label.rindex('.')
	    labelId=int(label[dashIdx+1:dotIdx])
	    minIndex = labelId
	    maxIndex = labelId + Ki - 1
# dalia - IS THIS A PROBLEM? THIS WILL SELECT ROWS OF C WHEN CALCULATING FULL ERROR, BUT NOT SURE THESE ROWS ARE USED
	    selectRowsC = sketchingRowsC[(sketchingRowsC >= minIndex) & (sketchingRowsC <= maxIndex)]
	    selectRowsC = selectRowsC - minIndex
	    if len(selectRowsC) == 0:
		continue;

	# always solve for Ci first!
	Ci = np.zeros((Ki,R))
#	if sketching == 1 or sketching == 3:
#	if (decompMode < 3 and (sketching == 1 or sketching >= 3)) or (decompMode == 3 and 0 < errorCalcSketchingRate < 1) and not onUpdateWeightLoop:
	if (decompMode < 3 and (sketching == 1 or sketching >= 3) and sketchingRate < 1.0) or (decompMode == 3 and 0 < errorCalcSketchingRate < 1) and not onUpdateWeightLoop:
            ZiTZic = tensorOps.ZTZ(A[sketchingRowsA,:], B[sketchingRowsB,:])
            XiZic = np.dot(unfold(Xi[:,sketchingRowsA,:][:,:,sketchingRowsB], 0), khatri_rao([Ci, A[sketchingRowsA,:], B[sketchingRowsB,:]], skip_matrix=0))
	    '''
	    if (decompMode == 3):
		print 'Solving for partial C'
	    '''
	# don't need a sketching == 2, since else is the same
	else:
	    '''
	    if (decompMode == 3):
		print 'Solving for full C'
	    '''
            ZiTZic = tensorOps.ZTZ(A, B)
            XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
        #ZiTZic = tensorOps.ZTZ(A, B)
        #XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
        if regularization > 0:
            ZiTZic = ZiTZic + regulParam * eye
	# I don't have Ci yet...
	#if regularization == 2:
	#    XiZi = XiZi + regulParam * Ci
        Ci = solve(ZiTZic.T, XiZic.T).T
#	print 'Xi=\n',Xi
#	print 'new Ci=\n',Ci

        if decompMode == 1:
#	    if sketching == 1 or sketching >= 3:
	    if (sketching == 1 or sketching >= 3) and sketchingRate < 1.0:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B[sketchingRowsB,:], Ci[selectRowsC,:])
                XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:][:,:,sketchingRowsB], 1), khatri_rao([Ci[selectRowsC,:], A, B[sketchingRowsB,:]], skip_matrix=1))
	    elif sketching == 2:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci[selectRowsC,:])
                XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:], 1), khatri_rao([Ci[selectRowsC,:], A, B], skip_matrix=1))
	    else:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci)
#                XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, B, I, J, Ki, R)
                XiZi = XiZi + np.dot(unfold(Xi, 1), khatri_rao([Ci, A, B], skip_matrix=1))
        elif decompMode == 2:
#	    if sketching == 1 or sketching >= 3:
	    if (sketching == 1 or sketching >= 3) and sketchingRate < 1.0:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A[sketchingRowsA,:], Ci[selectRowsC,:])
                XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:][:,sketchingRowsA,:], 2), khatri_rao([Ci[selectRowsC,:], A[sketchingRowsA,:], B], skip_matrix=2))
	    elif sketching == 2:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci[selectRowsC,:])
                XiZi = XiZi + np.dot(unfold(Xi[selectRowsC,:,:], 2), khatri_rao([Ci[selectRowsC,:], A, B], skip_matrix=2))
	    else:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci)
#                XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, A, I, J, Ki, R)
                XiZi = XiZi + np.dot(unfold(Xi, 2), khatri_rao([Ci, A, B], skip_matrix=2))
        elif decompMode == 3:
#	    if sketching == 1 or sketching == 3:
	    if 0 < errorCalcSketchingRate < 1 and not onUpdateWeightLoop:
		error = error + np.square(norm(Xi[selectRowsC,:,:][:,sketchingRowsA,:][:,:,sketchingRowsB] - kruskal_to_tensor([Ci[selectRowsC,:], A[sketchingRowsA,:], B[sketchingRowsB,:]]), 2))
		#print 'Error calc with partial C'
	    elif sketching == 2:
		error = error + np.square(norm(Xi[selectRowsC,:,:] - kruskal_to_tensor([Ci[selectRowsC,:], A, B]), 2))
	    else:
		#print 'Error calc with full C'
		error = error + np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))
		#print 'local error =',np.square(norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))
        else:
            print 'Unknown decomposition mode. Catastrophic error. Failing now...'

    if (len(rows) > 0) and (decompMode < 3):
        ret.append(['ZTZ',ZiTZi])
        ret.append(['XZ',XiZi])
    elif (decompMode == 3):
        ret.append(['error',error])
#	print 'cumulative error =',error
    del ZiTZi, XiZi
    return ret
示例#8
0
def HOOI(tensor, r1, r2, num_iter=500, error_print=True, tol=10e-5):
    """
    U:   [r1, S, 1, 1]
    Core:[r2,r1, kernel_w, kernel_h]
    V:   [T, r2, 1, 1]
    """
    w_out_channel, w_in_channel, kernel_w, kernel_h = [i for i in tensor.shape]

    # compute sparse ratio of W
    sparse_ratio = (tensor < 0.005).astype(np.float32).mean()
    print 'sparse ratio is ', sparse_ratio
    print tensor.shape, tensor.min(), tensor.max()
    for i in np.arange(-0.1, 0.282, 0.03):
        boolvalue = (tensor > i) & (tensor < (i + 0.03))
        ratio = boolvalue.astype(np.float32).mean()
        print ratio

    # tucker-2 decomposition
    ranks = [r2, r1]

    ### tucker step1: HOSVD init
    factors = []
    for mode in range(2):
        eigenvecs, _, _ = partial_svd(unfold(tensor, mode),
                                      n_eigenvecs=ranks[mode])
        factors.append(eigenvecs)
    factors.append(np.eye(kernel_w))
    factors.append(np.eye(kernel_h))

    ### HOOI decomposition
    rec_errors = []
    norm_tensor = norm(tensor, 2)

    for iteration in range(num_iter):
        for mode in range(2):
            core_approximation = tucker_to_tensor(tensor,
                                                  factors,
                                                  skip_factor=mode,
                                                  transpose_factors=True)
            eigenvecs, _, _ = partial_svd(unfold(core_approximation, mode),
                                          n_eigenvecs=ranks[mode])
            factors[mode] = eigenvecs

        core = tucker_to_tensor(tensor, factors, transpose_factors=True)
        reconstruct_tensor = tucker_to_tensor(
            core, factors, transpose_factors=False)  # reconstruct tensor
        rec_error1 = norm(tensor - reconstruct_tensor, 2) / norm_tensor
        rec_errors.append(rec_error1)

        if iteration > 1:
            if error_print:
                print('reconsturction error={}, norm_tensor={}, variation={}.'.
                      format(rec_errors[-1], rec_error1,
                             rec_errors[-2] - rec_errors[-1]))

            if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol:
                if error_print:
                    print('converged in {} iterations.'.format(iteration))
                break

    #print tensor.shape,core.shape,factors[0].shape,factors[1].shape
    Core = core
    U = factors[1].transpose((1, 0)).reshape((r1, w_in_channel, 1, 1))
    V = factors[0].reshape((w_out_channel, r2, 1, 1))
    #print Core.shape,U.shape,V.shape
    return Core, V, U
示例#9
0
def singleModeALSstep(partition):
    """
    Runs a single step of Alternating Least Squares to solve for one of A (mode = 1),
    B (mode = 2), or C (mode = 3) matrix.
    """
    '''
    if decompMode == 1:
        print 'Solving for A....'
    elif decompMode == 2:
        print 'Solving for B....'
    elif decompMode == 3:
        print 'Solving for Ci...'
    '''
    ret = []
    rows = list(partition)
    ZiTZi = 0
    XiZi = 0

    error = 0.0

    for row in rows:
        label = row[0]
        Xi = row[1]
        Ki = Xi.shape[0]
        if sketching > 0:
            dashIdx = label.rindex('-')
            dotIdx = label.rindex('.')
            labelId = int(label[dashIdx + 1:dotIdx])
            minIndex = labelId
            maxIndex = labelId + Ki - 1
            selectRowsC = sketchingRowsC[(sketchingRowsC >= minIndex)
                                         & (sketchingRowsC <= maxIndex)]
            selectRowsC = selectRowsC - minIndex
            if len(selectRowsC) == 0:
                continue

# always solve for Ci first!
        Ci = np.zeros((Ki, R))
        if sketching == 1 or sketching == 3:
            ZiTZic = tensorOps.ZTZ(A[sketchingRowsA, :], B[sketchingRowsB, :])
            XiZic = np.dot(
                unfold(Xi[:, sketchingRowsA, :][:, :, sketchingRowsB], 0),
                khatri_rao([Ci, A[sketchingRowsA, :], B[sketchingRowsB, :]],
                           skip_matrix=0))
# don't need a sketching == 2, since else is the same
        else:
            ZiTZic = tensorOps.ZTZ(A, B)
            XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B],
                                                     skip_matrix=0))
        #ZiTZic = tensorOps.ZTZ(A, B)
        #XiZic = np.dot(unfold(Xi, 0), khatri_rao([Ci, A, B], skip_matrix=0))
        if regularization > 0:
            ZiTZic = ZiTZic + regulParam * eye

# I don't have Ci yet...
#if regularization == 2:
#    XiZi = XiZi + regulParam * Ci
        Ci = solve(ZiTZic.T, XiZic.T).T

        if decompMode == 1:
            if sketching == 1 or sketching == 3:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B[sketchingRowsB, :],
                                              Ci[selectRowsC, :])
                XiZi = XiZi + np.dot(
                    unfold(Xi[selectRowsC, :, :][:, :, sketchingRowsB], 1),
                    khatri_rao([Ci[selectRowsC, :], A, B[sketchingRowsB, :]],
                               skip_matrix=1))
            elif sketching == 2:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci[selectRowsC, :])
                XiZi = XiZi + np.dot(
                    unfold(Xi[selectRowsC, :, :], 1),
                    khatri_rao([Ci[selectRowsC, :], A, B], skip_matrix=1))
            else:
                ZiTZi = ZiTZi + tensorOps.ZTZ(B, Ci)
                #                XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, B, I, J, Ki, R)
                XiZi = XiZi + np.dot(unfold(Xi, 1),
                                     khatri_rao([Ci, A, B], skip_matrix=1))
        elif decompMode == 2:
            if sketching == 1 or sketching == 3:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A[sketchingRowsA, :],
                                              Ci[selectRowsC, :])
                XiZi = XiZi + np.dot(
                    unfold(Xi[selectRowsC, :, :][:, sketchingRowsA, :], 2),
                    khatri_rao([Ci[selectRowsC, :], A[sketchingRowsA, :], B],
                               skip_matrix=2))
            elif sketching == 2:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci[selectRowsC, :])
                XiZi = XiZi + np.dot(
                    unfold(Xi[selectRowsC, :, :], 2),
                    khatri_rao([Ci[selectRowsC, :], A, B], skip_matrix=2))
            else:
                ZiTZi = ZiTZi + tensorOps.ZTZ(A, Ci)
                #                XiZi = XiZi + tensorOps.unfolded_3D_matrix_multiply(decompMode, Xi, Ci, A, I, J, Ki, R)
                XiZi = XiZi + np.dot(unfold(Xi, 2),
                                     khatri_rao([Ci, A, B], skip_matrix=2))
        elif decompMode == 3:
            if sketching == 1 or sketching == 3:
                error = error + np.square(
                    norm(
                        Xi[selectRowsC, :, :][:, sketchingRowsA, :]
                        [:, :, sketchingRowsB] - kruskal_to_tensor([
                            Ci[selectRowsC, :], A[sketchingRowsA, :],
                            B[sketchingRowsB, :]
                        ]), 2))
            elif sketching == 2:
                error = error + np.square(
                    norm(
                        Xi[selectRowsC, :, :] -
                        kruskal_to_tensor([Ci[selectRowsC, :], A, B]), 2))
            else:
                error = error + np.square(
                    norm(Xi - kruskal_to_tensor([Ci, A, B]), 2))
        else:
            print 'Unknown decomposition mode. Catastrophic error. Failing now...'

    if (len(rows) > 0) and (decompMode < 3):
        ret.append(['ZTZ', ZiTZi])
        ret.append(['XZ', XiZi])
    elif (decompMode == 3):
        ret.append(['error', error])
    del ZiTZi, XiZi
    return ret
示例#10
0
def calculateErrorTensorly(tensor, A, B, C):
    return norm(tensor - kruskal_to_tensor([C, A, B]),
                2) / calculateFNormXTensorly(tensor)
示例#11
0
def calculateFNormXTensorly(tensor):
    return norm(tensor, 2)