Exemplo n.º 1
0
def subtractSparseBlockMatFast(left, right, p):
    n = right.numRows()
    negBlocks = right.blocks.map(lambda x: elementWiseMultiply(x, -1))
    negative_right = BlockMatrix(negBlocks, p, p, n, n)

    result = negative_right.add(left)
    return result
Exemplo n.º 2
0
def CreateInputsDistributed(input_case, block_size=3):
    logging.warn('CreateInputsDistributed started')
    #data_file = '/u/vparames/TESTS/3/test-commute-dist-' + str(input_case) + '.mat'
    if ("GJan" in str(input_case)):
        data_dir = '/u/vparames/TESTS/climate_precip/'
        logging.warn('elist laoding :started')
        e_file = data_dir + str(input_case) + '-elist.mat'
        inp2 = loadmat(e_file)
        edge_list = inp2['elist']
        logging.warn('elist laoding :done')
        data_file = data_dir + str(input_case) + '.mat'
        inp = loadmat(data_file)
        adj_mat = inp['G']
    else:
        data_file = INPUT_DIR + str(input_case) + '.mat'
        inp = loadmat(data_file)
        adj_mat = inp['G']
        edge_list = inp['elist']
    logging.warn('Mat file loading Done ! ')
    dense = sparse.issparse(adj_mat)
    print "adj shape", adj_mat.shape
    '''
	-----------------------------------------------
	lines added for v4
	get the closest square size that will fit with the given block size
	----------------------------------------------------
	'''
    num_blocks = int(math.ceil(adj_mat.shape[0] / (block_size * 1.0)))
    squared_shape = (block_size * num_blocks)
    original_shape = adj_mat.shape[0]
    if (not (isinstance(adj_mat, sparse.csc_matrix))):
        adj_mat = np.pad(adj_mat, (0, squared_shape - original_shape),
                         'constant',
                         constant_values=(0))
    else:
        #BUG FIX:Earlier version
        # adj_mat = sparse.csc_matrix((adj_mat.data,adj_mat.nonzero()), shape =(squared_shape,squared_shape))
        ptr = sparse.find(
            adj_mat)  #ptr[0] - indices, ptr[1] -indptr , ptr[2]-data
        adj_mat = sparse.csc_matrix((ptr[2], (ptr[0], ptr[1])),
                                    shape=(squared_shape, squared_shape))
    print "adj matrix reshaped to ", adj_mat.shape
    SplitMatrixInBlocks(adj_mat, block_size)
    filelist = sc.textFile(BLOCKS_DIR + 'filelist.txt', minPartitions=minP)
    # filelist = sc.textFile(BLOCKS_DIR + 'filelist.txt')
    blocks_rdd = filelist.map(MapperLoadBlocksFromMatFile)
    # adjacency_mat = BlockMatrix(blocks_rdd, block_size, block_size)
    adjacency_mat = BlockMatrix(blocks_rdd, block_size, block_size,
                                adj_mat.shape[0], adj_mat.shape[1])
    logging.warn(
        'adjacency_mat is created with :\n rows:\t %d\ncols:\t %d \n NumOfRowsPerBlock : \t %d \n NumColsPerBlock:\t %d \n',
        adjacency_mat.numRows(), adjacency_mat.numCols(),
        adjacency_mat.rowsPerBlock, adjacency_mat.colsPerBlock)
    logging.warn('CreateInputsDistributed ended')
    return adjacency_mat, edge_list
Exemplo n.º 3
0
def main():
    if len(sys.argv) < 2:
        print('USAGE: matrix_mult.py <dim of matrix>')
        return

    n = int(sys.argv[1])
    dm2 = Matrices.dense(n, n, np.random.randint(1, n * n, n * n).tolist())
    blocks1 = sc.parallelize([((0, 0), dm2)])
    m2 = BlockMatrix(blocks1, n, n)
    m3 = BlockMatrix(blocks1, n, n)
    ret = m3.multiply(m2).toIndexedRowMatrix().toRowMatrix().rows.collect()
    print('****************n:', n)
Exemplo n.º 4
0
def main():
	if len(sys.argv) < 2:
		print('USAGE: matrix_mult.py <dim of matrix>')
		return 
	
	
	n = int(sys.argv[1])
	dm2 = Matrices.dense(n, n, np.random.randint(1, n * n, n * n).tolist())
	blocks1 = sc.parallelize([((0,0), dm2)])
	m2 = BlockMatrix(blocks1, n,n)
	m3 = BlockMatrix(blocks1, n,n)
	ret = m3.multiply(m2).toIndexedRowMatrix().toRowMatrix().rows.collect()
	print('****************n:', n)
Exemplo n.º 5
0
def mult(A, B):
    #-------LOG
    logging.warn("Multiplication started")
    blockcount = A.blocks.getNumPartitions()
    logging.warn("A part count")
    logging.warn(blockcount)
    blockcount = B.blocks.getNumPartitions()
    logging.warn("B part count")
    logging.warn(blockcount)
    #-----LOG

    # If dense, just call the inbuilt function.
    if (isinstance(A.blocks.first()[1], DenseMatrix)
            or isinstance(B.blocks.first()[1], DenseMatrix)):
        return A.multiply(B)
    #sparse ? Then continue the madness

    N = A.numRows()
    p = SQUARE_BLOCK_SIZE
    num_blocks = N / p

    aleft = A.blocks.flatMap(lambda x: affectLeft(x, num_blocks))
    bright = B.blocks.flatMap(lambda x: affectRight(x, num_blocks))
    both = aleft.union(bright)
    indi = both.reduceByKey(lambda a, b: prod(a, b))
    map = indi.map(lambda x: ((x[0][0], x[0][2]), x[1]))
    pr = map.reduceByKey(add)
    brd = pr.map(lambda x: ((x[0][0], x[0][
        1]), Matrices.sparse(p, p, x[1].indptr, x[1].indices, x[1].data)))
    C = BlockMatrix(brd, p, p, N, N)
    return C
Exemplo n.º 6
0
def _getColumns(blockMat, j, norm=1):
    """
    Returns column(s) j of the input BlockMatrix as a BlockMatrix with
    the same number of rowsPerBlock.
    """
    sc = SparkContext.getOrCreate()
    if np.isscalar(j):
        colsPerBlock = blockMat.colsPerBlock
        jBlockCol = j // colsPerBlock
        jInBlock = j % colsPerBlock
        jBlocks = blockMat.blocks.filter(lambda x: x[0][1] == jBlockCol)

        def g(block):
            colJ = block[1].toArray()[:, jInBlock] / norm
            return ((block[0][0], 0), OldMatrices.dense(len(colJ), 1, colJ))

        colJBlocks = jBlocks.map(g)
        return BlockMatrix(colJBlocks,
                           rowsPerBlock=blockMat.rowsPerBlock,
                           colsPerBlock=1,
                           numCols=1)
    else:
        j_b = sc.broadcast(j)
        blockMat_red = blockMat.toIndexedRowMatrix()
        rows_red = blockMat_red.rows.map(lambda row: (
            row.index, OldVectors.dense(row.vector.toArray()[j_b.value] / norm
                                        )))
        j_b.unpersist()
        return IndexedRowMatrix(rows_red).toBlockMatrix(
            rowsPerBlock=blockMat.rowsPerBlock,
            colsPerBlock=min(len(j), blockMat.colsPerBlock))
Exemplo n.º 7
0
def LoadGraph(filename):
    filelist = sc.textFile(filename + 'filelist.txt', minPartitions=18)
    blocks_rdd = filelist.map(MapperLoadBlocksFromMatFile)
    elist_file = filename + 'elist.mat'
    inp = loadmat(elist_file)
    edge_list = inp['elist']
    # number of rows per block, number of columns per block, number of rows in giant matrix, number of columns in giant matrix
    adjacency_mat = BlockMatrix(blocks_rdd, SQUARE_BLOCK_SIZE,
                                SQUARE_BLOCK_SIZE, SQUARE_TOTAL_SIZE,
                                SQUARE_TOTAL_SIZE)
    logging.warn(
        'adjacency_mat is created with :\n rows:\t %d\ncols:\t %d \n NumOfRowsPerBlock : \t %d \n NumColsPerBlock:\t %d \n',
        adjacency_mat.numRows(), adjacency_mat.numCols(),
        adjacency_mat.rowsPerBlock, adjacency_mat.colsPerBlock)
    logging.warn('CreateInputsDistributed ended')
    return adjacency_mat, edge_list
Exemplo n.º 8
0
def LoadGraph(rowsPerBlock, colsPerBlock, totalRows, totalColumns):
    filelist = sc.textFile(INPUT_DIR + 'filelist.txt', minPartitions=18)
    blocks_rdd = filelist.map(MapperLoadBlocksFromMatFile)
    elist_file = INPUT_DIR + 'elist.mat'
    inp = loadmat(elist_file)
    edge_list = inp['elist']
    # number of rows per block, number of columns per block, number of rows in giant matrix, number of columns in giant matrix
    adjacency_mat = BlockMatrix(blocks_rdd, rowsPerBlock, colsPerBlock,
                                totalRows, totalColumns)
    logging.warn(
        'adjacency_mat is created with :\n rows:\t %d\ncols:\t %d \n NumOfRowsPerBlock : \t %d \n NumColsPerBlock:\t %d \n',
        adjacency_mat.numRows(), adjacency_mat.numCols(),
        adjacency_mat.rowsPerBlock, adjacency_mat.colsPerBlock)
    logging.warn('CreateInputsDistributed ended')
    print adjacency_mat, edge_list
    return adjacency_mat, edge_list
Exemplo n.º 9
0
def DiagonalBlockMatrix(diag, dense=False):
    n = len(diag)
    p = SQUARE_BLOCK_SIZE
    num_blocks = n / p
    blockids = sc.parallelize(it.product(xrange(num_blocks), repeat=2))
    block_rdd = blockids.map(lambda x: difun(x, diag))
    return BlockMatrix(block_rdd, p, p, n, n)
Exemplo n.º 10
0
 def diagonalBlockMatrix(self, diag, dense=False):
     n = len(diag)
     p = self.squareBlockSize
     num_blocks = n / p
     blockids = self.sc.parallelize(it.product(xrange(num_blocks),
                                               repeat=2))
     block_rdd = blockids.map(lambda x: self.difun(x, diag))
     return BlockMatrix(block_rdd, p, p, n, n)
Exemplo n.º 11
0
def createAdjMatElection(graphNodes, year, sparseG, blockSize, sc):
    if sparseG:
        GENERATE_SPARSE = True
    normalizeDonations = False
    path = election_data_path
    if (year == 12):
        donations = loadContributionsCSV(path, elecion_files[1])
    elif (year == 16):
        donations = loadContributionsCSV(path, elecion_files[2])
        # Fix to overflow errors due to large Aij 
        donations[4,:] = 0.1 * donations[4,:]
        donations[6,:] = 0.1 * donations[6,:]
    else:
        print 'ERROR: Wrong value of year (', year, '), only 12 and 16 allowed'        
        return

    if graphNodes < len(donations):
        if False:
            d1 = loadContributionsCSV(path, elecion_files[1])
            d2 = loadContributionsCSV(path, elecion_files[2])
            totalD = np.sum(d1 + d2, 1)
            topDonors = totalD.argsort()[-graphNodes:]
            donations = donations[topDonors,:]
        else:
            donations = donations[0:graphNodes,:]

    if graphNodes > len(donations):
        extraNodes = graphNodes - len(donations)
        donations = np.append(donations, np.zeros((extraNodes, donations.shape[1])), axis = 0)

    blocks, n = splitInBlocks(donations, blockSize)
    donationsRdd = sc.parallelize(blocks, n)

    #--------------------------------------    
    n = donationsRdd.count()
    logging.warn('donationsRdd count = ' + str(n) + ', parts = ' + \
        str(donationsRdd.getNumPartitions()))
    donationsRdd.repartition(n).cache()
    a = donationsRdd.take(1)
    sqlContext = SQLContext(sc)
    #--------------------------------------

    logging.warn('donationsRdd parts = ' + str(donationsRdd.getNumPartitions()))

    allPairDonations = donationsRdd.cartesian(donationsRdd)
    logging.warn('allPairDonations count = ' + str(allPairDonations.count()))
    adjMatBlocks = allPairDonations.map(constructElectionBlock)
    if normalizeDonations:
        logging.warn('Before normalizeDonations')
        adjMatBlocks = nomalizeBlocks(adjMatBlocks)
        logging.warn('Done normalizeDonations')
    return adjMatBlocks

    logging.warn('Calling BlockMatrix(), size = ' + str(N))
    adjMat = BlockMatrix(adjMatBlocks, blockSize, blockSize, N, N)    
    return adjMat
Exemplo n.º 12
0
 def test_computeRowSums(self):
     dm1 = OldMatrices.dense(3, 2, [1, 2, 3, 4, 5, 6])
     dm2 = OldMatrices.dense(3, 2, [7, 8, 9, 10, 11, 12])
     dm3 = OldMatrices.dense(3, 2, [13, 14, 15, 16, 17, 18])
     dm4 = OldMatrices.dense(3, 2, [19, 20, 21, 22, 23, 24])
     blocks = self.sc.parallelize([((0, 0), dm1), ((0, 1), dm2),
                                   ((1, 0), dm3), ((1, 1), dm4)])
     mat = BlockMatrix(blocks, 3, 2)
     rowSums = sparkle.util._computeRowSums(mat)
     self.assertTrue(np.all(rowSums == [48, 66, 84, 102]))
Exemplo n.º 13
0
def CreateInputs(input_case):
    data_file = '/u/vparames/TESTS/3/test-commute-dist-' + str(
        input_case) + '.mat'
    inp = loadmat(data_file)
    adj_mat = inp['G']
    edge_list = inp['elist']
    n = adj_mat.shape[0]
    sm = Matrices.dense(n, n, adj_mat.transpose().flatten())
    adjacency_mat = BlockMatrix(sc.parallelize([((0, 0), sm)]),
                                SQUARE_BLOCK_SIZE, SQUARE_BLOCK_SIZE)

    return adjacency_mat, edge_list
Exemplo n.º 14
0
 def loadGraph(self, graphFolder):
     self.inputDir = graphFolder
     if not os.path.exists(graphFolder + "SparkBlocks/"):
         os.makedirs(graphFolder + "SparkBlocks/")
     self.blocksDir = graphFolder + "SparkBlocks/"
     filelist = self.sc.textFile(graphFolder + 'filelist.txt',
                                 minPartitions=self.minPartitions)
     blocks_rdd = filelist.map(MapperLoadBlocksFromMatFile)
     elist_file = graphFolder + 'elist.mat'
     inp = loadmat(elist_file)
     edge_list = inp['elist']
     blocksize = copy.deepcopy(self.squareBlockSize)
     matrixSize = copy.deepcopy(self.mainMatrixSize)
     # number of rows per block, number of columns per block, number of rows in giant matrix, number of columns in giant matrix
     adjacency_mat = BlockMatrix(blocks_rdd, blocksize, blocksize,
                                 matrixSize, matrixSize)
     logging.warn(
         'adjacency_mat is created with :\n rows:\t %d\ncols:\t %d \n NumOfRowsPerBlock : \t %d \n NumColsPerBlock:\t %d \n',
         adjacency_mat.numRows(), adjacency_mat.numCols(),
         adjacency_mat.rowsPerBlock, adjacency_mat.colsPerBlock)
     logging.warn('CreateInputsDistributed ended')
     return adjacency_mat, edge_list
Exemplo n.º 15
0
def _normalize(blockMat, norm):
    """
    Normalize blockMat by dividing all entries by norm.
    """
    def g(block):
        newmat = OldMatrices.dense(block[1].numRows, block[1].numCols,
                                   block[1].toArray() / norm)
        return (block[0], newmat)

    newBlocks = blockMat.blocks.map(g)
    return BlockMatrix(newBlocks,
                       rowsPerBlock=blockMat.rowsPerBlock,
                       colsPerBlock=blockMat.colsPerBlock)
Exemplo n.º 16
0
def _colVectorToBlockMatrix(vec, rowsPerBlock, numSlices=None):
    sc = SparkContext.getOrCreate()
    remainder = len(vec) % rowsPerBlock
    if rowsPerBlock >= len(vec):
        splits = [vec]
    elif remainder == 0:
        splits = np.split(vec, len(vec) // rowsPerBlock)
    else:
        head = vec[:-remainder]
        splits = np.split(head, len(head) // rowsPerBlock)
        splits.append(vec[-remainder:])
    blocks = sc.parallelize([((i, 0), OldMatrices.dense(len(split), 1, split))
                             for i, split in zip(range(len(splits)), splits)],
                            numSlices=numSlices)
    return BlockMatrix(blocks, rowsPerBlock, 1, len(vec), 1)
Exemplo n.º 17
0
def _thresholdColVector(blockMat, rho):
    """
    Apply soft-thresholding to a column vector BlockMatrix.
    """
    def g(block):
        blockArr = block[1].toArray().ravel()
        newmat = OldMatrices.dense(
            block[1].numRows, block[1].numCols,
            np.sign(blockArr) * np.maximum(0,
                                           np.abs(blockArr) - rho))
        return (block[0], newmat)

    newBlocks = blockMat.blocks.map(g)
    return BlockMatrix(newBlocks,
                       rowsPerBlock=blockMat.rowsPerBlock,
                       colsPerBlock=blockMat.colsPerBlock)
Exemplo n.º 18
0
def _standardize(blockMat, center=0, scale=1):
    """
    Standardize blockMat columns by subtracting center and dividing
    by scale.

    :param blockMat: A pyspark.mllib.linalg.distributed.BlockMatrix.
    :param   center: Either a scalar value which will be subtracted from
                     all entries of blockMat, or a 1D array of length
                     blockMat.numCols(), in which case center[j] will be
                     subtracted from the entries in blockMat column j.
    :param    scale: Either a scalar value which will divide all entries in
                     blockMat, or a 1D array of length blockMat.numCols(),
                     in which case scale[j] will divide the entries in
                     blockMat column j.
    """
    sc = SparkContext.getOrCreate()
    colsPerBlock = sc.broadcast(blockMat.colsPerBlock)
    cb = sc.broadcast(center)
    sb = sc.broadcast(scale)

    def g(block):
        i, j = block[0]
        mat = block[1].toArray()
        n, m = mat.shape
        col0 = colsPerBlock.value * j
        blockCenter = cb.value if np.isscalar(
            cb.value) else cb.value[col0:(col0 + m)]
        blockScale = sb.value if np.isscalar(
            sb.value) else sb.value[col0:(col0 + m)]
        newmat = (mat - blockCenter) / blockScale
        newmat = OldMatrices.dense(n, m, newmat.ravel(order='F'))
        return ((i, j), newmat)

    newBlocks = blockMat.blocks.map(g)
    colsPerBlock.unpersist()
    cb.unpersist()
    sb.unpersist()
    return BlockMatrix(newBlocks,
                       rowsPerBlock=blockMat.rowsPerBlock,
                       colsPerBlock=blockMat.colsPerBlock)
Exemplo n.º 19
0
sparse1 = Matrices.sparse(sp_rows, sp_cols, row_pointers1, col_index1, value1)
sparse2 = Matrices.sparse(sp_cols, sp_cols, row_pointers2, col_index2, value2)
"""sparse1 = newmat.toarray()
sparse2 = rand_mat.toarray()
print sparse2"""

r1 = sp_rows / 2
c1 = sp_cols / 2
r2 = sp_cols / 2
c2 = sp_cols / 2
"""a, b, c, d = sparse1[:r1, :c1], sparse1[r1:, :c1], sparse1[:r1, c1:], sparse1[r1:, c1:]
e, f, g, h = sparse2[:r2, :c2], sparse2[r2:, :c2], sparse2[:r2, c2:], sparse2[r2:, c2:]

blocks1 = sc.parallelize([((0, 0), a),((1,0), b), ((0,1),c), ((1,1),d)])
blocks2 = sc.parallelize([((0, 0), e),((0,1), f), ((0,1),g), ((1,1),h)])"""

blocks1 = sc.parallelize([((0, 0), sparse1), ((0, 1), sparse1)])
blocks2 = sc.parallelize([((0, 0), sparse2), ((1, 0), sparse2)])

num = blocks1.getNumPartitions()
print num

mat1 = BlockMatrix(blocks1, 2, 2, 2, 2)
mat2 = BlockMatrix(blocks2, 2, 2, 2, 2)

start1 = timeit.default_timer()
res1 = mat1.multiply(mat2)
stop1 = timeit.default_timer()
t1 = (stop1 - start1)
print t1
Exemplo n.º 20
0
from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.mllib.linalg import Matrices
from pyspark.mllib.linalg.distributed import BlockMatrix
from pyspark.mllib.util import MLUtils

sc = SparkContext()
spark = SparkSession(sc)

# Create an RDD of sub-matrix blocks.
blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
                         ((1, 0), Matrices.dense(3, 2,
                                                 [7, 8, 9, 10, 11, 12]))])

# Create a BlockMatrix from an RDD of sub-matrix blocks.
mat = BlockMatrix(blocks, 3, 2)
# Get its size.
m = mat.numRows()  # 6
n = mat.numCols()  # 2
print("m: " + str(m))
print("n: " + str(n))

print(mat)

# Get the blocks as an RDD of sub-matrix blocks.
blocksRDD = mat.blocks

# Convert to a LocalMatrix.
localMat = mat.toLocalMatrix()

# Convert to an IndexedRowMatrix.
Exemplo n.º 21
0
    def exercise_3(self):
        """
		This code is only a rough idea about non-nagetive matrix factorization for spark.
		It has not been tested, therefore it may contains lots of bugs or wrong presentation.
		For the same reason, I can not report its error rate.
		Code is modified from the following scratch,which also follows the formula of NMF.
		
		def getNMF(X,K):
			D,N = X.shape #X-WH
			W = np.mat(np.random.rand(D,K))
			H = np.mat(np.random.rand(K,N))
			cost = [np.square(X-W.dot(H)).sum()]
			while True:
				H = np.multiply((W.T.dot(X))/(W.T.dot(W).dot(H)),H)
				W = np.multiply((X.dot(H.T))/(W.dot(H).dot(H.T)),W)
				cost.append(np.square(X-W.dot(H)).sum())
				if (cost[-2]-cost[-1]<1):
					return W,H,cost
		"""

        #Assume R is the sparse matrix and need to be factorized. Assume R is a block matrix
        #R=WH
        #W is a D*K matrix, while H is a K*N matrix
        def getCost(R, W, H):
            y = R.toCoordinateMatrix().map(
                lambda entries: ((entries.i, entries.j), entries.value))
            x = W.multiply(H).toCoordinateMatrix().map(
                lambda entries: ((entries.i, entries.j), entries.value))
            return x.union(y).reduceByKey(lambda a, b: a - b).map(
                lambda x: x**2).sum()

        def newH(R, W, H):
            #H = np.multiply((W.T.dot(X))/(W.T.dot(W).dot(H)),H)
            a = W.transpose().multiply(R).toCoordinateMatrix()\
             .map(lambda entries:((entries.i,entries.j),(0,entries.value)))
            b = W.transpose().multiply(W).multiply(H).toCoordinateMatrix()\
             .map(lambda entries:((entries.i,entries.j),(1,entries.value)))
            c = a.union(b).reduceByKey(lambda a, b: (a[0] == 0 and (2, a[
                2] / b[2])) or (b[0] == 0 and 2, b[2] / a[2]) or b)
            #identify the right order of dividing
            c = c.map(lambda x: ((x[0][0], x[0][1]), x[1][1]))
            d = c.join(H.toCoordinateMatrix())\
             .map(lambda entries:((entries.i,entries.j),entries.value))\
             .reduceByKey(lambda a,b:a*b)
            return CoordinateMatrix(
                d.map(lambda x: MatrixEntry(
                    (x[0][0], x[0][1]), x[1][1]))).toBlockMatrix()

        def newW(R, W, H):
            #W = np.multiply((X.dot(H.T))/(W.dot(H).dot(H.T)),W)
            a = R.multiply(H.transpose()).toCoordinateMatrix()\
             .map(lambda entries:((entries.i,entries.j),(0,entries.value)))
            b = W.multiply(H).multiply(H.transpose()).toCoordinateMatrix()\
             .map(lambda entries:((entries.i,entries.j),(1,entries.value)))
            c = a.union(b).reduceByKey(lambda a, b: (a[0] == 0 and (2, a[
                2] / b[2])) or (b[0] == 0 and 2, b[2] / a[2]) or b)
            #identify the right order of dividing
            c = c.map(lambda x: ((x[0][0], x[0][1]), x[1][1]))
            d = c.join(W.toCoordinateMatrix().map(lambda entries:((entries.i,entries.j),entries.value)))\
             .reduceByKey(lambda a,b:a*b)
            return CoordinateMatrix(
                d.map(lambda x: MatrixEntry(
                    (x[0][0], x[0][1]), x[1][1]))).toBlockMatrix()

        D = R.numCols()
        N = R.numRows()
        W = BlockMatrix(Matrices(D, K, np.random.uniform(0, 1, D * K)))
        H = BlockMatrix(Matrices(K, N, np.random.uniform(0, 1, N * K)))
        cost = [cost(R, W, H)]
        threshold = 1  # iteration stopping threshold
        while True:
            H = newH(R, W, H)
            W = newW(R, W, H)
            cost.append(getCost(R, W, H))
            if (cost[-2] - cost[-1] < threshold):
                break

        return None
Exemplo n.º 22
0
def addToResults(**nameBlockMatrices):
    global RESULTS_dict
    n, p = GRAPH_NODES, SQUARE_BLOCK_SIZE
    for name, blockMat in nameBlockMatrices.iteritems():
        RESULTS_dict[name] = BlockMatrix(blockMat, p, p, n,
                                         n).toLocalMatrix().toArray()
Exemplo n.º 23
0
# Convert to a RowMatrix.
rowMat = mat.toRowMatrix()
# Convert to an IndexedRowMatrix.
indexedRowMat = mat.toIndexedRowMatrix()
# Convert to a BlockMatrix.
blockMat = mat.toBlockMatrix()
print('三元组分布式矩阵:')
print(blockMat)

# 块矩阵
# Create an RDD of sub-matrix blocks.
blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
                         ((1, 0), Matrices.dense(3, 2,
                                                 [7, 8, 9, 10, 11, 12]))])

# Create a BlockMatrix from an RDD of sub-matrix blocks.
mat = BlockMatrix(blocks, 3, 2)
# Get its size.
m = mat.numRows()  # 6
n = mat.numCols()  # 2
# Get the blocks as an RDD of sub-matrix blocks.
blocksRDD = mat.blocks
# Convert to a LocalMatrix.
localMat = mat.toLocalMatrix()
# Convert to an IndexedRowMatrix.
indexedRowMat = mat.toIndexedRowMatrix()
# Convert to a CoordinateMatrix.
coordinateMat = mat.toCoordinateMatrix()
print('块矩阵:')
print(coordinateMat)
sc.stop()