Python cublas_init示例，cudamat.cublas_init Python示例

示例#1

0

显示文件

文件： rbmHtoV.py 项目： vbillys/DeepNet

def rbmHtoV(m, X):
    """convey data fron hidden layer to visible layer"""
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasV = cm.CUDAMatrix(cm.reformat(m.biasV))

    nCase = X.shape[0]
    nVis = biasV.asarray().size
    VisActP = cm.CUDAMatrix(np.zeros((nCase, nVis)))

    if m.type == "BB":
        cm.dot(data, weight.T, target=VisActP)
        VisActP.add_row_vec(biasV)
        VisActP.apply_sigmoid()
    elif m.type == "BG":
        cm.dot(data, weight.T, target=VisActP)
        VisActP.add_row_vec(biasV)
    elif m.type == "GB":
        pass

    result = VisActP.asarray()

    #free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    VisActP.free_device_memory()

    cm.shutdown()

    return result

示例#2

0

显示文件

文件： rnnet.py 项目： trb116/pythonanalyzer

    def __init__(self,
                 inputDim,
                 outputDim,
                 layerSize,
                 numLayers,
                 maxBatch,
                 train=True,
                 temporalLayer=-1):
        # Initialize cublas
        cm.cublas_init()

        self.outputDim = outputDim
        self.inputDim = inputDim
        self.layerSize = layerSize
        self.numLayers = numLayers
        self.layerSizes = [layerSize] * numLayers
        self.maxBatch = maxBatch
        self.train = train

        if not self.train:
            np.seterr(all='ignore')

        if temporalLayer <= 0 or temporalLayer >= numLayers:
            self.temporalLayer = -1
        else:
            self.temporalLayer = temporalLayer

        self.maxAct = 20.0

示例#3

0

显示文件

文件： rbmHtoV.py 项目： bushuhui/DeepNet

def rbmHtoV(m, X) :
    """convey data fron hidden layer to visible layer"""
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasV = cm.CUDAMatrix(cm.reformat(m.biasV))

    nCase = X.shape[0]
    nVis = biasV.asarray().size
    VisActP = cm.CUDAMatrix(np.zeros((nCase, nVis)))

    if m.type == "BB" :
        cm.dot(data, weight.T, target = VisActP)
        VisActP.add_row_vec(biasV)
        VisActP.apply_sigmoid()
    elif m.type == "BG" :
        cm.dot(data, weight.T, target = VisActP)
        VisActP.add_row_vec(biasV)
    elif m.type == "GB" :
        pass

    result = VisActP.asarray()

    #free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    VisActP.free_device_memory()

    cm.shutdown()

    return result

示例#4

0

显示文件

文件： poolAE.py 项目： tesatory/auto-pooling

    def train_init(self):
        # init cudamat
        cm.cublas_init()
        cm.CUDAMatrix.init_random(1)

        self.Wgpu = cm.CUDAMatrix(self.W)
        self.speed = cm.empty(self.W.shape)
        self.speed.assign(0)

示例#5

0

显示文件

    def __init__(self,inputDim,outputDim,layerSize,numLayers,maxBatch,train=True):
        # Initialize cublas
        cm.cublas_init()

        self.outputDim = outputDim
        self.inputDim = inputDim
        self.layerSizes = [layerSize]*numLayers
        self.maxBatch = maxBatch
        self.train = train

示例#6

0

显示文件

文件： GPU_lock.py 项目： arvieFrydenlund/Neural_Language_Models

def LockGPU():
  board = gpu_lock.obtain_lock_id()
  if board == -1:
    print 'No GPU board available.'
    sys.exit(1)
  else:
    cm.cuda_set_device(board)
    cm.cublas_init()
  return board

示例#7

0

显示文件

文件： trainer.py 项目： klee141/deepnet

def LockGPU(max_retries=10):
    for retry_count in range(max_retries):
        board = gpu_lock.obtain_lock_id()
        if board != -1:
            break
    if board == -1:
        print 'No GPU board available.'
        sys.exit(1)
    else:
        cm.cuda_set_device(board)
        cm.cublas_init()

示例#8

0

显示文件

文件： trainer.py 项目： wOOL/deepnet

def LockGPU(max_retries=10):
    for retry_count in range(max_retries):
        board = gpu_lock.obtain_lock_id()
        if board != -1:
            break
    if board == -1:
        print "No GPU board available."
        sys.exit(1)
    else:
        cm.cuda_set_device(board)
        cm.cublas_init()

示例#9

0

显示文件

文件： util.py 项目： fairymane/convnet

def LockGPU(max_retries=10):
  """ Locks a free GPU board and returns its id. """
  for retry_count in range(max_retries):
    board = gpu_lock.obtain_lock_id()
    if board != -1:
      break
    sleep(1)
  if board == -1:
    print 'No GPU board available.'
    sys.exit(1)
  else:
    cm.cuda_set_device(board)
    cm.cublas_init()
  return board

示例#10

0

显示文件

文件： util.py 项目： MonkeyTang/convnet

def LockGPU(max_retries=10):
  """ Locks a free GPU board and returns its id. """
  for retry_count in range(max_retries):
    board = gpu_lock.obtain_lock_id()
    if board != -1:
      break
    sleep(1)
  if board == -1:
    print 'No GPU board available.'
    sys.exit(1)
  else:
    cm.cuda_set_device(board)
    cm.cublas_init()
  return board

示例#11

0

显示文件

文件： util.py 项目： Luna86/RNN-DDCRP

def LockGPU(max_retries=10, board=-1):
  retry_count = 0
  while board == -1 and retry_count < max_retries:
    board = gpu_lock.obtain_lock_id()
    if board == -1:
      sleep(1)
      retry_count += 1
  if board == -1:
    print 'No GPU board available.'
    sys.exit(1)
  else:
    cm.cuda_set_device(board)
    cm.cublas_init()
  return board

示例#12

0

显示文件

def LockGPU(max_retries=10, board=-1):
    retry_count = 0
    while board == -1 and retry_count < max_retries:
        board = gpu_lock.obtain_lock_id()
        if board == -1:
            sleep(1)
            retry_count += 1
    if board == -1:
        print 'No GPU board available.'
        sys.exit(1)
    else:
        cm.cuda_set_device(board)
        cm.cublas_init()
    return board

示例#13

0

显示文件

文件： util.py 项目： pilotbear/face-deocc-lstm

def LockGPU(max_retries=10, board=-1):
    # retry_count = 0
    # while board == -1 and retry_count < max_retries:
    #   board = gpu_lock.obtain_lock_id()
    #   if board == -1:
    #     sleep(1)
    #     retry_count += 1
    # if board == -1:
    #   print 'No GPU board available.'
    #   sys.exit(1)
    # else:
    #   cm.cuda_set_device(board)
    #   cm.cublas_init()
    board = 3
    cm.cuda_set_device(board)
    cm.cublas_init()
    return board

示例#14

0

显示文件

文件： trainffnet.py 项目： kracwarlock/DeepLearning

def main():

	# initialize CUDA
	cm.cublas_init()

	# training parameters
	epsilon = 0.01
	momentum = 0.9
	num_epochs = 30
	batch_size = 128
	num_batches = 92

	# model parameters
	dim_in = 784
	dim_out = 1
	num_hid = 1024

	# load data
	util.load('data/mnist49.dat', globals())
	global dat_train
	global dat_test
	global lbl_train
	global lbl_test

	# Put training data onto the GPU.
	dat_train = dat_train/255.
	dat_train = dat_train - (np.mean(dat_train, 1)+10**-8)[:, np.newaxis]
	dev_train = cm.CUDAMatrix(dat_train)
	dev_lbl = cm.CUDAMatrix(lbl_train)

	net = ffnet.FFNet(epsilon, momentum, num_epochs, batch_size, num_batches, dim_in, dim_out, num_hid)
	net.train(dev_train, dev_lbl)

	# Load test data onto the GPU.
	dat_test = dat_test/255.
	dat_test = dat_test - np.mean(dat_test, 1)[:, np.newaxis]
	dev_test = cm.CUDAMatrix(dat_test)
	dev_lbl = cm.CUDAMatrix(lbl_test)

	net.reinitTestStorage(dat_test.shape[1])
	net.test(dev_test, dev_lbl)

	cm.cublas_shutdown()

示例#15

0

显示文件

    def __init__(self,
                 N=1000,
                 pz=1,
                 pg=0.1,
                 g=1.5,
                 alpha=1,
                 dt=0.1,
                 num_fits=1,
                 num_inputs=0,
                 state=None):
        cm.cublas_init()
        if state is not None:
            self.from_dict(state)
        else:
            self.N = N
            self.pg = pg
            self.pz = pz
            self.g = g
            self.alpha = alpha
            self.DT = dt
            self.num_fits = num_fits

            scale = 1.0 / np.sqrt(self.pg * self.N)
            M_rvs = stats.norm(loc=0, scale=scale).rvs
            self.M = sp.sparse.random(N, N, pg, data_rvs=M_rvs) * g
            self.M = cm.CUDAMatrix(self.M.toarray())
            self.P = (1.0 / self.alpha) * np.identity(N)
            self.wf = cm.CUDAMatrix(np.random.uniform(-1, 1, (N, num_fits)))
            #self.wo = np.expand_dims(stats.norm(loc=0,scale=(1.0/np.sqrt(N))).rvs(N),num_fits)
            self.wo = cm.CUDAMatrix(np.zeros((N, num_fits)))
            self.dw = np.zeros((N, num_fits))
            self.woc = np.zeros((N, 1))
            self.wfc = np.random.uniform(-1, 1, (N, 1))

            self.x = cm.CUDAMatrix(np.expand_dims(0.5 * np.random.randn(N), 1))
            self.xdt = cm.empty(self.x.shape).assign(0)
            self.r = cm.tanh(self.x)
            self.rdt = cm.empty(self.r.shape).assign(0)
            self.z = cm.CUDAMatrix(
                np.expand_dims(0.5 * np.random.randn(num_fits), 1))
            self.zdt = cm.empty(self.z.shape).assign(0)
            self.z_ctl = np.expand_dims(0.5 * np.random.randn(1), 1)

示例#16

0

显示文件

文件： linearcorex.py 项目： brekelma/dsbox_corex

    def __init__(self,
                 n_hidden=10,
                 max_iter=10000,
                 tol=1e-5,
                 anneal=True,
                 missing_values=None,
                 discourage_overlap=True,
                 gaussianize='standard',
                 gpu=False,
                 verbose=False,
                 precision=1e-8,
                 seed=None):
        self.m = n_hidden  # Number of latent factors to learn
        self.max_iter = max_iter  # Number of iterations to try
        self.tol = tol  # Threshold for convergence
        self.anneal = anneal
        self.eps = 0  # If anneal is True, it's adjusted during optimization to avoid local minima
        self.missing_values = missing_values

        self.discourage_overlap = discourage_overlap  # Whether or not to discourage overlapping latent factors
        self.gaussianize = gaussianize  # Preprocess data: 'standard' scales to zero mean and unit variance
        self.gpu = gpu  # Enable GPU support for some large matrix multiplications.
        if self.gpu:
            cm.cublas_init()

        self.yscale = 1.  # Can be arbitrary, but sets the scale of Y
        np.random.seed(seed)  # Set seed for deterministic results
        self.verbose = verbose
        if verbose:
            np.set_printoptions(precision=3, suppress=True, linewidth=160)
            print(('Linear CorEx with {:d} latent factors'.format(n_hidden)))

        self.precision = precision
        # Initialize these when we fit on data
        self.n_samples, self.nv = 0, 0  # Number of samples/variables in input data
        self.ws = np.zeros((0, 0))  # m by nv array of weights
        self.moments = {}  # Dictionary of moments
        self.theta = None  # Parameters for preprocessing each variable
        self.history = {}  # Keep track of values for each iteration
        self.last_update = 0  # Used for momentum methods

示例#17

0

显示文件

文件： rnnet.py 项目： Fakhraddin/stanford-ctc

    def __init__(self,inputDim,outputDim,layerSize,numLayers,maxBatch,
            train=True,temporalLayer=-1):
        # Initialize cublas
        cm.cublas_init()

        self.outputDim = outputDim
        self.inputDim = inputDim
        self.layerSize = layerSize
        self.numLayers = numLayers
        self.layerSizes = [layerSize]*numLayers
        self.maxBatch = maxBatch
        self.train = train

        if not self.train:
            np.seterr(all='ignore')

        if temporalLayer <= 0 or temporalLayer >= numLayers:
            self.temporalLayer = -1
        else:
            self.temporalLayer = temporalLayer

        self.maxAct = 20.0

示例#18

0

显示文件

def rbmVtoH(m, X) :
    """convey data fron visual layer to hidden layer"""
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasH = cm.CUDAMatrix(cm.reformat(m.biasH))

    nCase = X.shape[0]
    nHid = biasH.asarray().size

    hidActP = cm.CUDAMatrix(np.zeros((nCase, nHid)))

    if m.type == "BB" :
        cm.dot(data, weight, target = hidActP)
        hidActP.add_row_vec(biasH)
        hidActP.apply_sigmoid()
    elif m.type == "BG" :
        cm.dot(data, weight, target = hidActP)
        hidActP.add_row_vec(biasH)
    elif m.type == "GB" :
        pass

    result = hidActP.asarray()

    # free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasH.free_device_memory()
    hidActP.free_device_memory()

    cm.shutdown()

    return result

示例#19

0

显示文件

    def train(self):
        '''
        Main train function : modified version of the original train function.
        Additions : GPU selection (useful for multi-GPU machines)
					Saving the sum of the square of the data for post-processing
					Visible data are saved
					Data samples are permuted for training
					Weights are saved every 100 training epochs
					Training energy is visualized every 100 training epochs
		NOTE : anneal learning rate used in the initial code, is NOT used here!
        '''
        #plt.ion()
        f1 = plt.figure()
        ax1 = f1.add_subplot(111)
        #ax2 = f1.add_subplot(122)
        #plt.show()

        cmt.cuda_set_device(self.gpuId)
        cmt.cublas_init()
        cmt.CUDAMatrix.init_random(1)

        np.random.seed(self.npRandSeed)
        prng = RandomState(self.npRandState)

        ################################################################
        ##################### CHANGE PATH ##############################
        # Move to current experiment path:
        os.chdir(self.saveDir)
        # Get current path:
        os.getcwd()

        self.plotsDir = 'plots'
        #self.probabilitiesDir = 'p_all'
        if not os.path.isdir(self.plotsDir):
            os.makedirs(self.plotsDir)
        if not os.path.isdir(self.plotsDir + '/energy'):
            os.makedirs(self.plotsDir + '/energy')
        #if not os.path.isdir(self.probabilitiesDir):
        #	os.makedirs(self.probabilitiesDir)
        if not os.path.isdir('weights'):
            os.makedirs('weights')

        d = self.d.astype(np.float32)
        print("visible size: ", d.shape)

        dsq = np.square(d)
        lsq = np.sum(dsq, axis=0)
        with open('lsqComplete.pkl', 'wb') as pklFile:
            cPickle.dump(lsq, pklFile)

        del dsq, lsq

        # Save visible data :
        visData = d
        np.savez('visData.npz',
                 data=d,
                 obsKeys=self.obsKeys,
                 epochTime=self.epochTime)

        with open('visData.txt', 'w') as f:
            f.write("\n Dataset : %s" % (self.dataFilename))
            f.write("\n visData size: %s " % str(visData.shape))
            f.write("\n visData type: %s " % str(visData.dtype))
            f.write("\n \n visData Range: %s " %
                    str(np.max(visData, axis=0) - np.min(visData, axis=0)))
            f.write("\n \n visData min: %s " % str(np.min(visData, axis=0)))
            f.write("\n \n visData max: %s " % str(np.max(visData, axis=0)))
            f.write("\n \n visData mean: %s " % str(np.mean(visData, axis=0)))
            f.write("\n \n visData std: %s " % str(np.std(visData, axis=0)))
            f.close()

        del visData  #if not needed for computing the latent states

        permIdx = prng.permutation(d.shape[0])

        d = d[permIdx, :]

        #subsetting train and test datasets
        #trainPerc = 0.7
        #trainSampNum = int(np.ceil(trainPerc*d.shape[0]))
        #trainSampNum = int(np.floor(trainSampNum/self.batch_size)*self.batch_size)
        #testSampNum = int(d.shape[0]-trainSampNum-1)

        # The test dataset is not used at the moment, it can be used as
        # a validation set to check for overfitting. To use it, uncomment
        # all the variables with 'test' in their name

        #~ d_test = d[trainSampNum+1:,:]
        #d = d[:trainSampNum,:]
        #obsKeys = self.obsKeys[:trainSampNum]

        totnumcases = d.shape[0]
        num_vis = d.shape[1]

        num_batches = int(totnumcases / self.batch_size)
        print("num_batches: ", num_batches)
        dev_dat = cmt.CUDAMatrix(d.T)  # VxP
        #~ test_dat = cmt.CUDAMatrix(d_test.T)

        del d, self.d, self.epochTime, self.obsKeys

        # training parameters (as in the original code by Ranzato)
        epsilon = self.epsilon
        epsilonVF = 2 * epsilon
        epsilonFH = 0.02 * epsilon
        epsilonb = 0.02 * epsilon
        epsilonw_mean = 0.2 * epsilon
        epsilonb_mean = 0.1 * epsilon
        weightcost_final = self.weightcost_final

        # HMC setting
        hmc_step_nr = self.hmc_step_nr
        hmc_step = 0.01
        hmc_target_ave_rej = self.hmc_target_ave_rej
        hmc_ave_rej = hmc_target_ave_rej

        # initialize weights
        VF = cmt.CUDAMatrix(
            np.array(0.02 * prng.randn(num_vis, self.num_fac),
                     dtype=np.float32,
                     order='F'))  # VxH
        if self.apply_mask == 0:
            FH = cmt.CUDAMatrix(
                np.array(np.eye(self.num_fac, self.num_hid_cov),
                         dtype=np.float32,
                         order='F'))  # HxO
        else:
            dd = loadmat(
                'your_FHinit_mask_file.mat'
            )  # see CVPR2010paper_material/topo2D_3x3_stride2_576filt.mat for an example
            FH = cmt.CUDAMatrix(np.array(dd["FH"], dtype=np.float32,
                                         order='F'))
        bias_cov = cmt.CUDAMatrix(
            np.array(2.0 * np.ones((self.num_hid_cov, 1)),
                     dtype=np.float32,
                     order='F'))
        bias_vis = cmt.CUDAMatrix(
            np.array(np.zeros((num_vis, 1)), dtype=np.float32, order='F'))
        w_mean = cmt.CUDAMatrix(
            np.array(0.05 * prng.randn(num_vis, self.num_hid_mean),
                     dtype=np.float32,
                     order='F'))  # VxH
        bias_mean = cmt.CUDAMatrix(
            np.array(-2.0 * np.ones((self.num_hid_mean, 1)),
                     dtype=np.float32,
                     order='F'))

        # initialize variables to store derivatives
        VFinc = cmt.CUDAMatrix(
            np.array(np.zeros((num_vis, self.num_fac)),
                     dtype=np.float32,
                     order='F'))
        FHinc = cmt.CUDAMatrix(
            np.array(np.zeros((self.num_fac, self.num_hid_cov)),
                     dtype=np.float32,
                     order='F'))
        bias_covinc = cmt.CUDAMatrix(
            np.array(np.zeros((self.num_hid_cov, 1)),
                     dtype=np.float32,
                     order='F'))
        bias_visinc = cmt.CUDAMatrix(
            np.array(np.zeros((num_vis, 1)), dtype=np.float32, order='F'))
        w_meaninc = cmt.CUDAMatrix(
            np.array(np.zeros((num_vis, self.num_hid_mean)),
                     dtype=np.float32,
                     order='F'))
        bias_meaninc = cmt.CUDAMatrix(
            np.array(np.zeros((self.num_hid_mean, 1)),
                     dtype=np.float32,
                     order='F'))

        # initialize temporary storage
        data = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # VxP
        normdata = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # VxP
        negdataini = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # VxP
        feat = cmt.CUDAMatrix(
            np.array(np.empty((self.num_fac, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        featsq = cmt.CUDAMatrix(
            np.array(np.empty((self.num_fac, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        negdata = cmt.CUDAMatrix(
            np.array(prng.randn(num_vis, self.batch_size),
                     dtype=np.float32,
                     order='F'))
        old_energy = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        new_energy = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        energy = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        gradient = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # VxP
        normgradient = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # VxP
        thresh = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        feat_mean = cmt.CUDAMatrix(
            np.array(np.empty((self.num_hid_mean, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        vel = cmt.CUDAMatrix(
            np.array(prng.randn(num_vis, self.batch_size),
                     dtype=np.float32,
                     order='F'))
        length = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # 1xP
        lengthsq = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # 1xP
        normcoeff = cmt.CUDAMatrix(
            np.array(np.zeros((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))  # 1xP

        # commented to avoid computing the energy on test data
        #~ data_test = cmt.CUDAMatrix( np.array(np.empty((num_vis, testSampNum)), dtype=np.float32, order='F')) # Vxtest_batch
        #~ normdata_test = cmt.CUDAMatrix( np.array(np.empty((num_vis, testSampNum)), dtype=np.float32, order='F')) # Vxtest_batch
        #~ length_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) # 1xtest_batch
        #~ lengthsq_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) # 1xtest_batch
        #~ normcoeff_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) # 1xtest_batch
        #~ vel_test = cmt.CUDAMatrix( np.array(prng.randn(num_vis, testSampNum), dtype=np.float32, order='F'))
        #~ feat_test = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, testSampNum)), dtype=np.float32, order='F'))
        #~ featsq_test = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, testSampNum)), dtype=np.float32, order='F'))
        #~ feat_mean_test = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_mean, testSampNum)), dtype=np.float32, order='F'))
        #~ energy_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F'))

        if self.apply_mask == 1:  # this used to constrain very large FH matrices only allowing to change values in a neighborhood
            dd = loadmat('your_FHinit_mask_file.mat')
            mask = cmt.CUDAMatrix(
                np.array(dd["mask"], dtype=np.float32, order='F'))
        normVF = 1
        small = 0.5

        # other temporary vars
        t1 = cmt.CUDAMatrix(
            np.array(np.empty((self.num_hid_cov, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        t2 = cmt.CUDAMatrix(
            np.array(np.empty((self.num_hid_cov, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        t3 = cmt.CUDAMatrix(
            np.array(np.empty((self.num_fac, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        t4 = cmt.CUDAMatrix(
            np.array(np.empty((1, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        t5 = cmt.CUDAMatrix(
            np.array(np.empty((1, 1)), dtype=np.float32, order='F'))
        t6 = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        t7 = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.batch_size)),
                     dtype=np.float32,
                     order='F'))
        t8 = cmt.CUDAMatrix(
            np.array(np.empty((num_vis, self.num_fac)),
                     dtype=np.float32,
                     order='F'))
        t9 = cmt.CUDAMatrix(
            np.array(np.zeros((self.num_fac, self.num_hid_cov)),
                     dtype=np.float32,
                     order='F'))
        t10 = cmt.CUDAMatrix(
            np.array(np.empty((1, self.num_fac)), dtype=np.float32, order='F'))
        t11 = cmt.CUDAMatrix(
            np.array(np.empty((1, self.num_hid_cov)),
                     dtype=np.float32,
                     order='F'))

        # commented to avoid computing the energy on test data
        #~ t1_test = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_cov, testSampNum)), dtype=np.float32, order='F'))
        #~ t2_test = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_cov, testSampNum)), dtype=np.float32, order='F'))
        #~ t3_test = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, testSampNum)), dtype=np.float32, order='F'))
        #~ t4_test = cmt.CUDAMatrix( np.array(np.empty((1,testSampNum)), dtype=np.float32, order='F'))
        #~ t5_test = cmt.CUDAMatrix( np.array(np.empty((1,1)), dtype=np.float32, order='F'))
        #~ t6_test = cmt.CUDAMatrix( np.array(np.empty((num_vis, testSampNum)), dtype=np.float32, order='F'))

        meanEnergy = np.zeros(self.num_epochs)
        minEnergy = np.zeros(self.num_epochs)
        maxEnergy = np.zeros(self.num_epochs)
        #~ meanEnergy_test = np.zeros(self.num_epochs)
        #~ minEnergy_test = np.zeros(self.num_epochs)
        #~ maxEnergy_test = np.zeros(self.num_epochs)

        # start training
        for epoch in range(self.num_epochs):

            print "Epoch " + str(epoch)

            # anneal learning rates as found in the original code -
            # uncomment if you wish to use annealing!
            #~ epsilonVFc    = epsilonVF/max(1,epoch/20)
            #~ epsilonFHc    = epsilonFH/max(1,epoch/20)
            #~ epsilonbc    = epsilonb/max(1,epoch/20)
            #~ epsilonw_meanc = epsilonw_mean/max(1,epoch/20)
            #~ epsilonb_meanc = epsilonb_mean/max(1,epoch/20)

            # no annealing is used in our experiments because learning
            # was stopping too early
            epsilonVFc = epsilonVF
            epsilonFHc = epsilonFH
            epsilonbc = epsilonb
            epsilonw_meanc = epsilonw_mean
            epsilonb_meanc = epsilonb_mean

            weightcost = weightcost_final

            if epoch <= self.startFH:
                epsilonFHc = 0
            if epoch <= self.startwd:
                weightcost = 0

            # commented to avoid computing the energy on test data
            #~ data_test = test_dat

            #~ data_test.mult(data_test, target = t6_test) # DxP
            #~ t6_test.sum(axis = 0, target = lengthsq_test) # 1xP
            #~ lengthsq_test.mult(1./num_vis) # normalize by number of components (like std)
            #~ lengthsq_test.add(small) # small avoids division by 0
            #~ cmt.sqrt(lengthsq_test, target = length_test)
            #~ length_test.reciprocal(target = normcoeff_test) # 1xP
            #~ data_test.mult_by_row(normcoeff_test, target = normdata_test) # normalized data

            for batch in range(num_batches):

                # get current minibatch
                data = dev_dat.slice(
                    batch * self.batch_size, (batch + 1) *
                    self.batch_size)  # DxP (nr dims x nr samples)

                # normalize input data
                data.mult(data, target=t6)  # DxP
                t6.sum(axis=0, target=lengthsq)  # 1xP
                lengthsq.mult(
                    1. /
                    num_vis)  # normalize by number of components (like std)
                lengthsq.add(small)  # small avoids division by 0
                cmt.sqrt(lengthsq, target=length)
                length.reciprocal(target=normcoeff)  # 1xP
                data.mult_by_row(normcoeff, target=normdata)  # normalized data
                ## compute positive sample derivatives
                # covariance part
                cmt.dot(VF.T, normdata,
                        target=feat)  # HxP (nr facs x nr samples)
                feat.mult(feat, target=featsq)  # HxP
                cmt.dot(FH.T, featsq,
                        target=t1)  # OxP (nr cov hiddens x nr samples)
                t1.mult(-0.5)
                t1.add_col_vec(bias_cov)  # OxP
                t1.apply_sigmoid(target=t2)  # OxP
                cmt.dot(featsq, t2.T, target=FHinc)  # HxO
                cmt.dot(FH, t2, target=t3)  # HxP
                t3.mult(feat)
                cmt.dot(normdata, t3.T, target=VFinc)  # VxH
                t2.sum(axis=1, target=bias_covinc)
                bias_covinc.mult(-1)
                # visible bias
                data.sum(axis=1, target=bias_visinc)
                bias_visinc.mult(-1)
                # mean part
                cmt.dot(w_mean.T, data,
                        target=feat_mean)  # HxP (nr mean hiddens x nr samples)
                feat_mean.add_col_vec(bias_mean)  # HxP
                feat_mean.apply_sigmoid()  # HxP
                feat_mean.mult(-1)
                cmt.dot(data, feat_mean.T, target=w_meaninc)
                feat_mean.sum(axis=1, target=bias_meaninc)

                # HMC sampling: draw an approximate sample from the model
                if self.doPCD == 0:  # CD-1 (set negative data to current training samples)
                    hmc_step, hmc_ave_rej = self.draw_HMC_samples(
                        data, negdata, normdata, vel, gradient, normgradient,
                        new_energy, old_energy, VF, FH, bias_cov, bias_vis,
                        w_mean, bias_mean, hmc_step, hmc_step_nr, hmc_ave_rej,
                        hmc_target_ave_rej, t1, t2, t3, t4, t5, t6, t7, thresh,
                        feat, featsq, self.batch_size, feat_mean, length,
                        lengthsq, normcoeff, small, num_vis)
                else:  # PCD-1 (use previous negative data as starting point for chain)
                    negdataini.assign(negdata)
                    hmc_step, hmc_ave_rej = self.draw_HMC_samples(
                        negdataini, negdata, normdata, vel, gradient,
                        normgradient, new_energy, old_energy, VF, FH, bias_cov,
                        bias_vis, w_mean, bias_mean, hmc_step, hmc_step_nr,
                        hmc_ave_rej, hmc_target_ave_rej, t1, t2, t3, t4, t5,
                        t6, t7, thresh, feat, featsq, self.batch_size,
                        feat_mean, length, lengthsq, normcoeff, small, num_vis)

                # compute derivatives at the negative samples
                # normalize input data
                negdata.mult(negdata, target=t6)  # DxP
                t6.sum(axis=0, target=lengthsq)  # 1xP
                lengthsq.mult(
                    1. /
                    num_vis)  # normalize by number of components (like std)
                lengthsq.add(small)
                cmt.sqrt(lengthsq, target=length)
                length.reciprocal(target=normcoeff)  # 1xP
                negdata.mult_by_row(normcoeff,
                                    target=normdata)  # normalized data
                # covariance part
                cmt.dot(VF.T, normdata, target=feat)  # HxP
                feat.mult(feat, target=featsq)  # HxP
                cmt.dot(FH.T, featsq, target=t1)  # OxP
                t1.mult(-0.5)
                t1.add_col_vec(bias_cov)  # OxP
                t1.apply_sigmoid(target=t2)  # OxP
                FHinc.subtract_dot(featsq, t2.T)  # HxO
                FHinc.mult(0.5)
                cmt.dot(FH, t2, target=t3)  # HxP
                t3.mult(feat)
                VFinc.subtract_dot(normdata, t3.T)  # VxH
                bias_covinc.add_sums(t2, axis=1)
                # visible bias
                bias_visinc.add_sums(negdata, axis=1)
                # mean part
                cmt.dot(w_mean.T, negdata, target=feat_mean)  # HxP
                feat_mean.add_col_vec(bias_mean)  # HxP
                feat_mean.apply_sigmoid()  # HxP
                w_meaninc.add_dot(negdata, feat_mean.T)
                bias_meaninc.add_sums(feat_mean, axis=1)

                # update parameters
                VFinc.add_mult(VF.sign(), weightcost)  # L1 regularization
                VF.add_mult(VFinc, -epsilonVFc / self.batch_size)
                # normalize columns of VF: normalize by running average of their norm
                VF.mult(VF, target=t8)
                t8.sum(axis=0, target=t10)
                cmt.sqrt(t10)
                t10.sum(axis=1, target=t5)
                t5.copy_to_host()
                normVF = .95 * normVF + (
                    .05 / self.num_fac) * t5.numpy_array[0, 0]  # estimate norm
                t10.reciprocal()
                VF.mult_by_row(t10)
                VF.mult(normVF)
                bias_cov.add_mult(bias_covinc, -epsilonbc / self.batch_size)
                bias_vis.add_mult(bias_visinc, -epsilonbc / self.batch_size)

                if epoch > self.startFH:
                    FHinc.add_mult(FH.sign(), weightcost)  # L1 regularization
                    FH.add_mult(FHinc, -epsilonFHc / self.batch_size)  # update
                    # set to 0 negative entries in FH
                    FH.greater_than(0, target=t9)
                    FH.mult(t9)
                    if self.apply_mask == 1:
                        FH.mult(mask)
                    # normalize columns of FH: L1 norm set to 1 in each column
                    FH.sum(axis=0, target=t11)
                    t11.reciprocal()
                    FH.mult_by_row(t11)
                w_meaninc.add_mult(w_mean.sign(), weightcost)
                w_mean.add_mult(w_meaninc, -epsilonw_meanc / self.batch_size)
                bias_mean.add_mult(bias_meaninc,
                                   -epsilonb_meanc / self.batch_size)

            if self.verbose == 1:
                print "VF: " + '%3.2e' % VF.euclid_norm(
                ) + ", DVF: " + '%3.2e' % (
                    VFinc.euclid_norm() * (epsilonVFc / self.batch_size)
                ) + ", FH: " + '%3.2e' % FH.euclid_norm(
                ) + ", DFH: " + '%3.2e' % (
                    FHinc.euclid_norm() * (epsilonFHc / self.batch_size)
                ) + ", bias_cov: " + '%3.2e' % bias_cov.euclid_norm(
                ) + ", Dbias_cov: " + '%3.2e' % (
                    bias_covinc.euclid_norm() * (epsilonbc / self.batch_size)
                ) + ", bias_vis: " + '%3.2e' % bias_vis.euclid_norm(
                ) + ", Dbias_vis: " + '%3.2e' % (
                    bias_visinc.euclid_norm() * (epsilonbc / self.batch_size)
                ) + ", wm: " + '%3.2e' % w_mean.euclid_norm(
                ) + ", Dwm: " + '%3.2e' % (
                    w_meaninc.euclid_norm() *
                    (epsilonw_meanc / self.batch_size)
                ) + ", bm: " + '%3.2e' % bias_mean.euclid_norm(
                ) + ", Dbm: " + '%3.2e' % (
                    bias_meaninc.euclid_norm() *
                    (epsilonb_meanc / self.batch_size)
                ) + ", step: " + '%3.2e' % hmc_step + ", rej: " + '%3.2e' % hmc_ave_rej
                with open('terminal.txt', 'a') as f:
                    f.write('\n' + "epoch: %s" % str(epoch) + ", VF: " +
                            '%3.2e' % VF.euclid_norm() + ", DVF: " + '%3.2e' %
                            (VFinc.euclid_norm() *
                             (epsilonVFc / self.batch_size)) + ", FH: " +
                            '%3.2e' % FH.euclid_norm() + ", DFH: " + '%3.2e' %
                            (FHinc.euclid_norm() *
                             (epsilonFHc / self.batch_size)) + ", bias_cov: " +
                            '%3.2e' % bias_cov.euclid_norm() +
                            ", Dbias_cov: " + '%3.2e' %
                            (bias_covinc.euclid_norm() *
                             (epsilonbc / self.batch_size)) + ", bias_vis: " +
                            '%3.2e' % bias_vis.euclid_norm() +
                            ", Dbias_vis: " + '%3.2e' %
                            (bias_visinc.euclid_norm() *
                             (epsilonbc / self.batch_size)) + ", wm: " +
                            '%3.2e' % w_mean.euclid_norm() + ", Dwm: " +
                            '%3.2e' % (w_meaninc.euclid_norm() *
                                       (epsilonw_meanc / self.batch_size)) +
                            ", bm: " + '%3.2e' % bias_mean.euclid_norm() +
                            ", Dbm: " + '%3.2e' %
                            (bias_meaninc.euclid_norm() *
                             (epsilonb_meanc / self.batch_size)) + ", step: " +
                            '%3.2e' % hmc_step + ", rej: " +
                            '%3.2e' % hmc_ave_rej)
                sys.stdout.flush()

            # commented to avoid computing the energy on trainig data
            self.compute_energy_mcRBM_visual(data, normdata, energy, VF, FH,
                                             bias_cov, bias_vis, w_mean,
                                             bias_mean, t1, t2, t6, feat,
                                             featsq, feat_mean, length,
                                             lengthsq, normcoeff, small,
                                             num_vis)
            energy.copy_to_host()
            meanEnergy[epoch] = np.mean(energy.numpy_array)
            minEnergy[epoch] = np.min(energy.numpy_array)
            maxEnergy[epoch] = np.max(energy.numpy_array)

            # commented to avoid computing the energy on test data
            #~ self.compute_energy_mcRBM_visual(data_test,normdata_test,energy_test,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,t1_test,t2_test,t6_test,feat_test,featsq_test,feat_mean_test,length_test,lengthsq_test,normcoeff_test,small,num_vis)
            #~ energy_test.copy_to_host()
            #~ meanEnergy_test[epoch] = np.mean(energy_test.numpy_array)
            #~ minEnergy_test[epoch] = np.min(energy_test.numpy_array)
            #~ maxEnergy_test[epoch] = np.max(energy_test.numpy_array)

            ax1.cla()
            ax1.plot(range(epoch), meanEnergy[0:epoch])
            ax1.plot(range(epoch), maxEnergy[0:epoch])
            ax1.plot(range(epoch), minEnergy[0:epoch])

            if np.mod(epoch, 100) == 0:
                #f1.savefig(output_folder + str(epoch)+'_'+'fig.png')
                f1.savefig(self.plotsDir +
                           '/energy/energyAt_%s.png' % str(epoch))

            # back-up every once in a while
            if np.mod(epoch, 100) == 0:
                VF.copy_to_host()
                FH.copy_to_host()
                bias_cov.copy_to_host()
                w_mean.copy_to_host()
                bias_mean.copy_to_host()
                bias_vis.copy_to_host()
                savemat(
                    "./weights/ws_temp%s" % str(epoch), {
                        'VF': VF.numpy_array,
                        'FH': FH.numpy_array,
                        'bias_cov': bias_cov.numpy_array,
                        'bias_vis': bias_vis.numpy_array,
                        'w_mean': w_mean.numpy_array,
                        'bias_mean': bias_mean.numpy_array,
                        'epoch': epoch
                    })

                # uncomment if computing the energy in order to store its evolution throghout training
                #~ savemat(self.refDir + '/' + "training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {'meanEnergy':meanEnergy,'meanEnergy_test':meanEnergy_test,'maxEnergy': maxEnergy, 'maxEnergy_test': maxEnergy_test, 'minEnergy': minEnergy, 'minEnergy_test': minEnergy_test, 'epoch':epoch})
                #savemat("training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {'meanEnergy':meanEnergy, 'maxEnergy': maxEnergy, 'minEnergy': minEnergy, 'epoch':epoch})

            # in order to stop the training gracefully, create an empty file
            # named 'stop_now' in the folder containing the experiment
            # configuration file
            if os.path.isfile('stop_now'):
                break

        # final back-up
        VF.copy_to_host()
        FH.copy_to_host()
        bias_cov.copy_to_host()
        bias_vis.copy_to_host()
        w_mean.copy_to_host()
        bias_mean.copy_to_host()
        savemat(
            "ws_fac%s" % str(self.num_fac) + "_cov%s" % str(self.num_hid_cov) +
            "_mean%s" % str(self.num_hid_mean), {
                'VF': VF.numpy_array,
                'FH': FH.numpy_array,
                'bias_cov': bias_cov.numpy_array,
                'bias_vis': bias_vis.numpy_array,
                'w_mean': w_mean.numpy_array,
                'bias_mean': bias_mean.numpy_array,
                'epoch': epoch
            })

        # uncomment if computing the energy in order to store its evolution throghout training
        #~ savemat(self.refDir + '/' + "training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {'meanEnergy':meanEnergy,'meanEnergy_test':meanEnergy_test,'maxEnergy': maxEnergy, 'maxEnergy_test': maxEnergy_test, 'minEnergy': minEnergy, 'minEnergy_test': minEnergy_test, 'epoch':epoch})
        savemat(
            "training_energy_" + str(self.num_fac) + "_cov" +
            str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {
                'meanEnergy': meanEnergy,
                'maxEnergy': maxEnergy,
                'minEnergy': minEnergy,
                'epoch': epoch
            })

        # Compute states if desired:
        # normalise data for covariance hidden:
        #dsq = np.square(visData)
        #lsq = np.sum(dsq, axis=0)
        #lsq /= visData.shape[1]
        #lsq += np.spacing(1)
        #l = np.sqrt(lsq)
        #normD = visData/l

        #logisticArg_c = (-0.5*np.dot(FH.numpy_array.T, np.square(np.dot(VF.numpy_array.T, normD.T))) + bias_cov.numpy_array).T
        #p_hc = logisticFunc(logisticArg_c)

        #logisticArg_m = np.dot(visData, w_mean.numpy_array) + bias_mean.numpy_array.T
        #p_hm = logisticFunc(logisticArg_m)

        #p_all = np.concatenate((p_hc, p_hm), axis=1)
        #savemat(self.probabilitiesDir + '/pAll_%i.mat' % epoch, mdict={'p_all':p_all})

        with open('done', 'w') as doneFile:
            doneFile.write(
                datetime.strftime(datetime.now(), '%d/%m/%Y %H:%M:%S'))

示例#20

0

显示文件

文件： rbmT.py 项目： bushuhui/DeepNet

def rbm(data, numHid, modelType = "BB", **kwargs) :
    """
    rbm defination
    data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities
           when type is GB, should be continuous real value. data should have a format of *.npy
    numHid : number nodes of and hidden layer
    type   : rbm type, can be set as BB or GB

additional inputs (specified as name value pairs or in struct)
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis
    penalty         weight decay factor
    batchsize       The number of training instances per batch
    verbose         For printing progress
    anneal          Flag. If set true, the penalty is annealed linearly
                through epochs to 10% of its original value

    OUTPUTS:
    model.type      Type of RBM (i.e. type of its visible and hidden units)
    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer
    model.top       The activity of the top layer, to be used when training
                    DBN's
    errors          The errors in reconstruction at every epoch
       """

    arg = util.processOptions(kwargs, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.9,\
                            maxEpoch = 50, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 50, \
                            verbose = True, \
                            anneal = False)
    [method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose, anneal] = [\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"],\
        arg["anneal"]
    ]

    # from which step, we start to compute the average
    avgStart = maxEpoch - avgLast

    # for weight decay use
    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(data.shape)

    if verbose :
        print "processing data"

    numVis = numDims
    numBatch = util.ceil(numCases,batchSize)

    # shuffle the data
    np.random.shuffle(data)

    # init CUDA
#    cm.cuda_set_device()
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))

    # init weights
    weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1,numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1,numHid)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidActP2 = cm.empty((batchSize, numHid))
    visState = cm.empty((batchSize,numVis))
    hidState = cm.empty((batchSize, numHid))

    t = 1
    for epoch in range(maxEpoch) :
        error = []

        if anneal :
            # apply linear weight decay
            penalty = oldPenalty - 0.9 *epoch/maxEpoch*oldPenalty

        for batch in range(numBatch) :
            # train each data batch
            if batchSize*(batch+1) > numCases :
                visTrue = deviceData.get_row_slice(batchSize*batch, numCases)
                batchSize = visTrue.shape[0]
            else :
                visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            # positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidState)

            if cmp(method, "SML") == 0 :
                if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) :
                    pass # here does not need in practical use
            elif cmp(method, "CD") == 0 :
                pass

            # negetive phase
            if cmp(modelType, "BB") == 0 :
                cm.dot(hidState, weight.transpose(), target = visActP)
                visActP.add_row_vec(biasV)
                visActP.apply_sigmoid()

                visState.fill_with_rand()
                visState.less_than(visActP, target = visState)
            elif cmp(modelType, "GB") == 0 :
                cm.dot(hidState, weight.transpose(), target = visActP)
                visActP.add_row_vec(biasV)

                visActP.add(np.random.randn(batchSize, numVis),target=visState)

            # another positive phase
            cm.dot(visState, weight, target = hidActP2)
            hidActP2.add_row_vec(biasH)
            hidActP2.apply_sigmoid()

            hidState.fill_with_rand()
            hidState.less_than(hidActP2, target=hidState)

            #update weight and bias
            dWeight = cm.dot(visTrue.transpose(), hidActP)
            dWeight.subtract_dot(visState.transpose(), hidActP2)
            dBiasV = visTrue.sum(axis = 0).subtract(visState.sum(axis = 0))
            dBiasH = hidActP.sum(axis=0).subtract(hidActP2.sum(axis = 0))

            dWeight.divide(batchSize).subtract(weight.mult(penalty))
            dBiasV.divide(batchSize)
            dBiasH.divide(batchSize)

            weightInc.mult(momentum).add_mult(dWeight, eta)
            biasVInc.mult(momentum).add_mult(dBiasV, eta)
            biasHInc.mult(momentum).add_mult(dBiasH, eta)

            weight.add(weightInc)
            biasV.add(biasVInc)
            biasH.add(biasHInc)

            if epoch > avgStart :
                # apply average
                weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t))
                biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t))
                biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t))
                t = t+1
            else :
                weightAgv = weight
                biasVAgv = biasV
                biasHAgv = biasH

            # reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm() ** 2)
        if verbose :
            print "epoch %d/%d. Reconstruction error is %f " % (epoch+1, maxEpoch, sum(error))

    # save rbm model
    top = cm.CUDAMatrix(np.zeros((numCases, numHid)))
    cm.dot(deviceData, weightAgv, target = top)
    top.add_row_vec(biasHAgv)
    top.apply_sigmoid()

    model_ = m.rbmModel(weightAgv,biasVAgv,biasHAgv,type = modelType,top = top)

    cm.shutdown()

    return model_

示例#21

0

显示文件

def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs):
    """
    X              ... data. should be binary, or in [0,1] interpreted as
                   ... probabilities
    numhid         ... number of hidden units
    y              ... List of discrete labels

    nClass          number of classes
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer

    model.weightlabel       ... The weights on labels layer
    model.biasLabel       ... The biases on labels layer

    errors          The errors in reconstruction at each epoch
       """

    arg = util.processOptions(kwargs, \
                            nClass = np.unique(y).size, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)
    [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["nClass"],\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    if verbose:
        print "Processing data ..."

    # from which step, we start to compute the average


#    avgStart = maxEpoch - avgLast

# for weight decay use
#    oldPenalty = penalty

# numCases : number of example
# numDims : the length of each example
# each row is an example
    [numCases, numDims] = list(X.shape)

    numVis = numDims
    uniqueLabel = np.unique(y)
    numBatch = util.ceil(numCases, batchSize)

    y = util.matrixLabel(y)

    # shuffle data and label
    data = copy.deepcopy(X)
    [data, label] = util.shuffle(data, y)

    # init CUDA
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))
    deviceLabel = cm.CUDAMatrix(cm.reformat(label))

    # init weights
    weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))
    weightLabel = cm.CUDAMatrix(0.1 * np.random.randn(nClass, numHid))
    biasLabel = cm.CUDAMatrix(np.zeros((1, nClass)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1, numHid)))
    weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid)))
    biasLabelInc = cm.CUDAMatrix(np.zeros((1, nClass)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch):
        error = []

        for batch in range(numBatch):
            # train each data batch
            if batchSize * (batch + 1) > numCases:
                visTrue = deviceData.get_row_slice(batchSize * batch, numCases)
                labelTrue = deviceLabel.get_row_slice(batchSize * batch,
                                                      numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else:
                visTrue = deviceData.get_row_slice(batchSize * batch,
                                                   batchSize * (batch + 1))
                labelTrue = deviceLabel.get_row_slice(batchSize * batch,
                                                      batchSize * (batch + 1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)
            weightLabel.mult(momentum)
            biasLabel.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)
            weightLabelInc.add_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0:
                if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)):
                    pass  # here does not need in practical use
            elif cmp(method, "CD") == 0:
                pass

            # negative phase
            cm.dot(hidActP, weight.T, target=visActP)
            visActP.add_row_vec(biasV)
            visActP.apply_sigmoid()

            cm.dot(hidActP, weightLabel.T, target=labelTrue)
            labelTrue.add_row_vec(biasLabel)
            labelTrue = util.softmax(labelTrue)

            # another positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)
            weightLabelInc.subtract_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0, mult=-1)

            # update weights and bias
            weight.add_mult(weightInc, eta / batchSize)
            biasV.add_mult(biasVInc, eta / batchSize)
            biasH.add_mult(biasHInc, eta / batchSize)
            weightLabel.add_mult(weightLabelInc, eta / batchSize)
            biasLabel.add_mult(biasLabelInc, eta / batchSize)

            # calculate reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)

            # free memory
            visTrue.free_device_memory()
            labelTrue.free_device_memory()

        if verbose:
            print "Epoch %d/%d, reconstruction error is %f " % (
                epoch + 1, maxEpoch, sum(error))

    # save rbm model
    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    weightLabel.copy_to_host()
    biasLabel.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \
                        weightLabel = weightLabel.numpy_array,\
                        biasLabel = biasLabel.numpy_array, labels = uniqueLabel)

    # free device memory
    deviceData.free_device_memory()
    deviceLabel.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()
    weightLabel.free_device_memory()
    biasLabel.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()
    weightLabelInc.free_device_memory()
    biasLabelInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    if isSaveModel:
        modelList = []
        modelList.append(model_)
        model = np.array(modelList)
        np.save(name, model)

    return model_

示例#22

0

显示文件

文件： rbmFit.py 项目： bushuhui/DeepNet

def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs) :
    """
    X              ... data. should be binary, or in [0,1] interpreted as
                   ... probabilities
    numhid         ... number of hidden units
    y              ... List of discrete labels

    nClass          number of classes
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer

    model.weightlabel       ... The weights on labels layer
    model.biasLabel       ... The biases on labels layer

    errors          The errors in reconstruction at each epoch
       """

    arg = util.processOptions(kwargs, \
                            nClass = np.unique(y).size, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)
    [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["nClass"],\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    if verbose :
        print "Processing data ..."

    # from which step, we start to compute the average
#    avgStart = maxEpoch - avgLast

    # for weight decay use
#    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(X.shape)

    numVis = numDims
    uniqueLabel = np.unique(y)
    numBatch = util.ceil(numCases, batchSize)

    y = util.matrixLabel(y)

    # shuffle data and label
    data = copy.deepcopy(X)
    [data, label] = util.shuffle(data, y)

    # init CUDA
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))
    deviceLabel = cm.CUDAMatrix(cm.reformat(label))

    # init weights
    weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))
    weightLabel = cm.CUDAMatrix(0.1*np.random.randn(nClass, numHid))
    biasLabel = cm.CUDAMatrix(np.zeros((1,nClass)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1,numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1,numHid)))
    weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid)))
    biasLabelInc = cm.CUDAMatrix(np.zeros((1,nClass)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch) :
        error = []

        for batch in range(numBatch) :
            # train each data batch
            if batchSize*(batch+1) > numCases :
                visTrue = deviceData.get_row_slice(batchSize*batch, numCases)
                labelTrue = deviceLabel.get_row_slice(batchSize*batch, numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else :
                visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1))
                labelTrue = deviceLabel.get_row_slice(batchSize*batch, batchSize*(batch+1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)
            weightLabel.mult(momentum)
            biasLabel.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)
            weightLabelInc.add_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0 :
                if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) :
                    pass # here does not need in practical use
            elif cmp(method, "CD") == 0 :
                pass

            # negative phase
            cm.dot(hidActP, weight.T, target = visActP)
            visActP.add_row_vec(biasV)
            visActP.apply_sigmoid()

            cm.dot(hidActP, weightLabel.T, target = labelTrue)
            labelTrue.add_row_vec(biasLabel)
            labelTrue = util.softmax(labelTrue)

            # another positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)
            weightLabelInc.subtract_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0, mult=-1)

            # update weights and bias
            weight.add_mult(weightInc, eta/batchSize)
            biasV.add_mult(biasVInc, eta/batchSize)
            biasH.add_mult(biasHInc, eta/batchSize)
            weightLabel.add_mult(weightLabelInc, eta/batchSize)
            biasLabel.add_mult(biasLabelInc, eta/batchSize)

            # calculate reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)

            # free memory
            visTrue.free_device_memory()
            labelTrue.free_device_memory()

        if verbose :
            print "Epoch %d/%d, reconstruction error is %f " % (epoch+1, maxEpoch, sum(error))

    # save rbm model
    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    weightLabel.copy_to_host()
    biasLabel.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \
                        weightLabel = weightLabel.numpy_array,\
                        biasLabel = biasLabel.numpy_array, labels = uniqueLabel)

    # free device memory
    deviceData.free_device_memory()
    deviceLabel.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()
    weightLabel.free_device_memory()
    biasLabel.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()
    weightLabelInc.free_device_memory()
    biasLabelInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    if isSaveModel :
        modelList = []
        modelList.append(model_)
        model = np.array(modelList)
        np.save(name,model)

    return model_

示例#23

0

显示文件

文件： query-bid.py 项目： SnakeHunt2012/word2vec

def main():

    parser = ArgumentParser()
    parser.add_argument("query_file", help = "word2vec file in json format")
    parser.add_argument("bidword_file", help = "word2vec file in json format")
    args = parser.parse_args()

    query_file = args.query_file
    bidword_file = args.bidword_file

    if DEBUG_FLAG:
        print "loading bidword dict ..."
    start = time()
    bidword_list, bidword_matrix = load_normalized_matrix(bidword_file)
    end = time()
    if DEBUG_FLAG:
        print "loading bidword dict done", duration(start, end)
    
    if DEBUG_FLAG:
        print "loading query dict ..."
    start = time()
    query_list, query_matrix = load_normalized_matrix(query_file)
    end = time()
    if DEBUG_FLAG:
        print "loading query dict done", duration(start, end)

    hash_length = 12
    hash_number = 1

    seed_matrix = random((200, hash_length * hash_number)) - 0.5

    if DEBUG_FLAG:
        print "initing cublas ..."
    start = time()
    cuda_set_device(1)
    cublas_init(1000000)
    end = time()
    if DEBUG_FLAG:
        print "initing cublas done", duration(start, end)

    if DEBUG_FLAG:
        print "computing hash_matrix ..."
    start = time()
    cuda_seed_matrix = CUDAMatrix(seed_matrix)
    cuda_bidword_matrix = CUDAMatrix(bidword_matrix)
    bidword_hash_matrix = dot(cuda_bidword_matrix, cuda_seed_matrix).asarray()
    del cuda_bidword_matrix
    cuda_query_matrix = CUDAMatrix(query_matrix)
    query_hash_matrix = dot(cuda_query_matrix, cuda_seed_matrix).asarray()
    del cuda_query_matrix
    end = time()
    if DEBUG_FLAG:
        print "computing hash_matrix done", duration(start, end)

    
    if DEBUG_FLAG:
        print "initing bidword_hash_dict_list ..."
    start = time()
    bidword_hash_dict_list = [dict([]) for i in xrange(hash_number)]
    end = time()
    if DEBUG_FLAG:
        print "initing bidword_hash_dict_list done", duration(start, end)
    
    if DEBUG_FLAG:
        print "aggregating bidword_hash_dict_list ..."
    start = time()
    for i in xrange(bidword_hash_matrix.shape[0]):
        hash_string = "".join(['1' if j > 0 else '0' for j in bidword_hash_matrix[i, :]])
        for j in xrange(hash_number):
            hash_index_start = j * hash_length
            hash_index_end = hash_index_start + hash_length
            hash_key = hash_string[hash_index_start:hash_index_end]
            if hash_key in bidword_hash_dict_list[j]:
                bidword_hash_dict_list[j][hash_key].add(i)
            else:
                bidword_hash_dict_list[j][hash_key] = set([i])
    end = time()
    if DEBUG_FLAG:
        print "aggregating bidword_hash_dict_list done", duration(start, end)

    if DEBUG_FLAG:
        print "aggregating query_hash_dict ..."
    start = time()
    query_hash_dict = {}
    for i in xrange(query_hash_matrix.shape[0]):
        hash_string = "".join(['1' if j > 0 else '0' for j in query_hash_matrix[i, :]])
        if hash_string in query_hash_dict:
            query_hash_dict[hash_string].add(i)
        else:
            query_hash_dict[hash_string] = set([i])
    end = time()
    if DEBUG_FLAG:
        print "aggregating querh_hash_dict done", duration(start, end)

    profiler_total = 0
    profiler_first = 0
    profiler_first_zero = 0
    profiler_first_one = 0
    profiler_first_two = 0
    profiler_first_three = 0
    profiler_first_four = 0
    profiler_second = 0
    profiler_third = 0
    timer = time()

    for hash_string in query_hash_dict:
        time_flag_total = time()
        time_flag_first = time()
        # random release memory
        
        if random_sample() > 0.95:
            collect()
        
        # aggregating query_index_set and bidword_index_set
        query_index_set = query_hash_dict[hash_string]
        bidword_index_set = set()
        for i in xrange(hash_number):
            time_flag_first_zero = time()
            hash_index_start = i * hash_length
            hash_index_end = hash_index_start + hash_length
            hash_key = hash_string[hash_index_start:hash_index_end]
            profiler_first_zero += time() - time_flag_first_zero
            # circum hash with hamming distance 0
            time_flag_first_one = time()
            bidword_index_set |= bidword_hash_dict_list[i][hash_key]
            profiler_first_one += time() - time_flag_first_one
            # circum hash with hamming distance 1
            time_flag_first_two = time()
            for first_index in xrange(hash_length):
                circum_hash_key = list(hash_key)
                circum_hash_key[first_index] = '1' if hash_key[first_index] == '0' else '0'
                circum_hash_key = "".join(circum_hash_key)
                if circum_hash_key in bidword_hash_dict_list[i]:
                    bidword_index_set |= bidword_hash_dict_list[i][circum_hash_key]
            profiler_first_two += time() - time_flag_first_two
            # circum hash with hamming distance 2
            time_flag_first_three = time()
            for first_index, second_index in combinations(range(hash_length), 2):
                circum_hash_key = list(hash_key)
                circum_hash_key[first_index] = '1' if hash_key[first_index] == '0' else '0'
                circum_hash_key[second_index] = '1' if hash_key[second_index] == '0' else '0'
                circum_hash_key = "".join(circum_hash_key)
                if circum_hash_key in bidword_hash_dict_list[i]:
                    bidword_index_set |= bidword_hash_dict_list[i][circum_hash_key]
            profiler_first_three += time() - time_flag_first_three
            ## circum hash with hamming distance 3
            #time_flag_first_four = time()
            #for first_index, second_index, third_index in combinations(range(hash_length), 3):
            #    circum_hash_key = list(hash_key)
            #    circum_hash_key[first_index] = '1' if hash_key[first_index] == '0' else '0'
            #    circum_hash_key[second_index] = '1' if hash_key[second_index] == '0' else '0'
            #    circum_hash_key[third_index] = '1' if hash_key[third_index] == '0' else '0'
            #    circum_hash_key = "".join(circum_hash_key)
            #    if circum_hash_key in bidword_hash_dict_list[i]:
            #        bidword_index_set |= bidword_hash_dict_list[i][circum_hash_key]
            #profiler_first_four += time() - time_flag_first_four
        # computing sim between query_index_list and bidword_index_list
        profiler_first += time() - time_flag_first
        
        query_index_list = list(query_index_set)
        bidword_index_list = list(bidword_index_set)

        partition_length = 1e8
        if DEBUG_FLAG or True:
            print "### profile ### matrix shape:", query_matrix[query_index_list, :].shape, bidword_matrix[bidword_index_list, :].transpose().shape, len(query_index_list) * len(bidword_index_list)
        if len(bidword_index_list) > partition_length:
            raise Exception("bidword_index_list too long: %d" % len(query_index_list))
        
        step = int(partition_length / len(bidword_index_list))
        partition_begin = 0
        partition_end = 0
        while partition_end < len(query_index_list):
            partition_end = len(query_index_list) if partition_begin + step > len(query_index_list) else partition_begin + step
            if DEBUG_FLAG or True:
                print "### profile ### partition_begin:", partition_begin, "partition_end:", partition_end
            time_flag_second = time()
            sim_matrix = dot(
                CUDAMatrix(query_matrix[query_index_list[partition_begin:partition_end], :]),
                CUDAMatrix(bidword_matrix[bidword_index_list, :].transpose())
            ).asarray().tolist()
            profiler_second += time() - time_flag_second
            profiler_third += sort_matrix(sim_matrix, query_list, query_index_list[partition_begin:partition_end], bidword_list, bidword_index_list)
            partition_begin = partition_end
            
        profiler_total += time() - time_flag_total
        if DEBUG_FLAG or True:
            print "### profile ### total=%f first=%f(%f)[%f(%f)%f(%f)%f(%f)%f(%f)%f(%f)] second=%f(%f) third=%f(%f) %s(%f)" % (
                profiler_total,
                profiler_first, profiler_first / profiler_total,
                profiler_first_zero, profiler_first_zero / profiler_first,
                profiler_first_one, profiler_first_one / profiler_first,
                profiler_first_two, profiler_first_two / profiler_first,
                profiler_first_three, profiler_first_three / profiler_first,
                profiler_first_four, profiler_first_four / profiler_first,
                profiler_second, profiler_second / profiler_total,
                profiler_third, profiler_third / profiler_total,
                duration(timer, time()), time() - timer
            )

示例#24

0

显示文件

def setup():
    cm.cublas_init()

示例#25

0

显示文件

文件： rbmPredict.py 项目： taoyonggang/DeepNet

def rbmPredict(m, X):
    """using trained rbm model to do prediction"""
    nClass = m.labels.size
    numCase = X.shape[0]

    # This part is executed on CPU
    # define the free energy
    #    FF = np.zeros((numCase, nClass))
    #    FFcol = np.zeros((numCase, 1))
    #    for index in range(nClass) :
    #        temp = np.zeros((numCase, nClass))
    #        temp[:, index] = 1
    #
    #        tt = np.emath.log(np.exp(np.dot(X, m.weight)+ np.dot(temp, m.weightLabel) + m.biasH)+1)
    #
    #        FFcol = temp[:,index] * m.biasLabel[0,index] + np.sum(tt,axis = 1)
    #
    #        FF[:, index] = FFcol
    #
    #    [x, y] = np.where(np.abs(FF - np.max(FF, axis=1, keepdims=True)) < 1e-5)

    #    result = np.zeros(y.shape)

    #    for index in range(y.size) :
    #        result[index] = m.labels[y[index]]

    # The following part runs on GPU
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasH = cm.CUDAMatrix(cm.reformat(m.biasH))
    weightLabel = cm.CUDAMatrix(cm.reformat(m.weightLabel))
    biasLabel = cm.CUDAMatrix(cm.reformat(m.biasLabel))

    F = cm.CUDAMatrix(np.zeros((numCase, nClass)))
    Fcol = cm.CUDAMatrix(np.zeros((numCase, 1)))
    temp = cm.CUDAMatrix(np.zeros((numCase, nClass)))

    tt = cm.CUDAMatrix(np.zeros((numCase, biasH.asarray().size)))
    for index in range(nClass):
        temp.assign(0)

        temp.set_col_slice(index, index + 1, 1)

        tt = cm.dot(data, weight)
        tt.add_dot(temp, weightLabel)
        tt.add_row_vec(biasH)
        cm.log_1_plus_exp(tt, target=tt, exact=True)

        Fcol = cm.sum(tt, axis=1)
        Fcol.add_mult(temp.get_col_slice(index, index + 1), biasLabel.numpy_array[0, index])

        F.set_col_slice(index, index + 1, Fcol)

        tt.free_device_memory()

    F.copy_to_host()
    [x, y] = np.where(np.abs(F.numpy_array - np.max(F.numpy_array, axis=1, keepdims=True)) < 1e-5)

    # free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasH.free_device_memory()
    biasLabel.free_device_memory()
    weightLabel.free_device_memory()

    F.free_device_memory()
    Fcol.free_device_memory()
    temp.free_device_memory()

    cm.shutdown()

    result = np.zeros(y.shape)

    for index in range(y.size):
        result[index] = m.labels[y[index]]

    return [result, F.numpy_array]

示例#26

0

显示文件

文件： test_cudamat.py 项目： untom/cudamat

def setup():
    cm.cublas_init()

示例#27

0

显示文件

文件： util.py 项目： LeZhengThu/unsupervised-videos

def LockGPU(max_retries=10, board=-1):

  # Assuming you already got GPU lock
  cm.cuda_set_device(board)
  cm.cublas_init()
  return board

示例#28

0

显示文件

def rbm(X, numHid, **kwargs):
    """
    rbm defination
    data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities
           when type is GB, should be continuous real value. data should have a format of *.npy
    numHid : number nodes of and hidden layer
    type   : rbm type, can be set as BB or GB

    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.type      Type of RBM (i.e. type of its visible and hidden units)
    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer
    model.top       The activity of the top layer, to be used when training
                    DBN's
    errors          The errors in reconstruction at every epoch
       """
    # when compute the transpose of a matrix, using the method *.transpose() is much space consuming. I suggest we can use
    #  .T atrribute instead

    arg = util.processOptions(kwargs, \
                            modelType = "BB", \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)

    [modelType, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["modelType"], \
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    # from which step, we start to compute the average
    #    avgStart = maxEpoch - avgLast

    # for weight decay use
    #    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(X.shape)

    if verbose:
        print "processing data"

    numVis = numDims
    numBatch = util.ceil(numCases, batchSize)

    # shuffle the data
    data = copy.deepcopy(X)
    np.random.shuffle(data)

    # init CUDA
    #    cm.cuda_set_device()
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))

    # init weights
    weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1, numHid)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch):
        error = []

        for batch in range(numBatch):
            # train each data batch
            if batchSize * (batch + 1) > numCases:
                visTrue = deviceData.get_row_slice(batchSize * batch, numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else:
                visTrue = deviceData.get_row_slice(batchSize * batch,
                                                   batchSize * (batch + 1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0:
                if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)):
                    pass  # here does not need in practical use
            elif cmp(method, "CD") == 0:
                pass

            # negetive phase
            if cmp(modelType, "BB") == 0:
                cm.dot(hidActP, weight.T, target=visActP)
                visActP.add_row_vec(biasV)
                visActP.apply_sigmoid()

            elif cmp(modelType, "GB") == 0:
                cm.dot(hidActP, weight.T, target=visActP)
                visActP.add_row_vec(biasV)

                visActP.add(np.random.randn(batchSize, numVis), target=visActP)

            # another positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)

            #update weight and bias
            weight.add_mult(weightInc, eta / batchSize)
            biasV.add_mult(biasVInc, eta / batchSize)
            biasH.add_mult(biasHInc, eta / batchSize)

            #            if epoch > avgStart :
            #                # apply average
            #                weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t))
            #                biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t))
            #                biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t))
            #                t = t+1
            #            else :
            #                weightAgv = weight
            #                biasVAgv = biasV
            #                biasHAgv = biasH

            # reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)

            # free device memory
            visTrue.free_device_memory()

        if verbose:
            print "epoch %d/%d. Reconstruction error is %f " % (
                epoch + 1, maxEpoch, sum(error))

    # save rbm model
    top = cm.CUDAMatrix(np.zeros((numCases, numHid)))
    cm.dot(cm.CUDAMatrix(cm.reformat(X)), weight, target=top)
    top.add_row_vec(biasH)
    top.apply_sigmoid()

    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    top.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, \
                        biasH.numpy_array, type = modelType, top = top.numpy_array)

    # free device memory
    deviceData.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    return model_

示例#29

0

显示文件

文件： rbm.py 项目： bushuhui/DeepNet

def rbm(X, numHid, **kwargs) :
    """
    rbm defination
    data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities
           when type is GB, should be continuous real value. data should have a format of *.npy
    numHid : number nodes of and hidden layer
    type   : rbm type, can be set as BB or GB

    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.type      Type of RBM (i.e. type of its visible and hidden units)
    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer
    model.top       The activity of the top layer, to be used when training
                    DBN's
    errors          The errors in reconstruction at every epoch
       """
# when compute the transpose of a matrix, using the method *.transpose() is much space consuming. I suggest we can use
#  .T atrribute instead

    arg = util.processOptions(kwargs, \
                            modelType = "BB", \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)

    [modelType, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["modelType"], \
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    # from which step, we start to compute the average
#    avgStart = maxEpoch - avgLast

    # for weight decay use
#    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(X.shape)

    if verbose :
        print "processing data"

    numVis = numDims
    numBatch = util.ceil(numCases, batchSize)

    # shuffle the data
    data = copy.deepcopy(X)
    np.random.shuffle(data)

    # init CUDA
#    cm.cuda_set_device()
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))

    # init weights
    weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1,numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1,numHid)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch) :
        error = []

        for batch in range(numBatch) :
            # train each data batch
            if batchSize*(batch+1) > numCases :
                visTrue = deviceData.get_row_slice(batchSize*batch, numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else :
                visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0 :
                if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) :
                    pass # here does not need in practical use
            elif cmp(method, "CD") == 0 :
                pass

            # negetive phase
            if cmp(modelType, "BB") == 0 :
                cm.dot(hidActP, weight.T, target = visActP)
                visActP.add_row_vec(biasV)
                visActP.apply_sigmoid()

            elif cmp(modelType, "GB") == 0 :
                cm.dot(hidActP, weight.T, target = visActP)
                visActP.add_row_vec(biasV)

                visActP.add(np.random.randn(batchSize, numVis),target=visActP)

            # another positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)

            #update weight and bias
            weight.add_mult(weightInc, eta/batchSize)
            biasV.add_mult(biasVInc, eta/batchSize)
            biasH.add_mult(biasHInc, eta/batchSize)

#            if epoch > avgStart :
#                # apply average
#                weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t))
#                biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t))
#                biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t))
#                t = t+1
#            else :
#                weightAgv = weight
#                biasVAgv = biasV
#                biasHAgv = biasH

            # reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm() ** 2)

            # free device memory
            visTrue.free_device_memory()

        if verbose :
            print "epoch %d/%d. Reconstruction error is %f " % (epoch+1, maxEpoch, sum(error))

    # save rbm model
    top = cm.CUDAMatrix(np.zeros((numCases, numHid)))
    cm.dot(cm.CUDAMatrix(cm.reformat(X)), weight, target = top)
    top.add_row_vec(biasH)
    top.apply_sigmoid()

    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    top.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, \
                        biasH.numpy_array, type = modelType, top = top.numpy_array)


    # free device memory
    deviceData.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    return model_

示例#30

0

显示文件

def rbmPredict(m, X):
    """using trained rbm model to do prediction"""
    nClass = m.labels.size
    numCase = X.shape[0]

    # This part is executed on CPU
    # define the free energy
    #    FF = np.zeros((numCase, nClass))
    #    FFcol = np.zeros((numCase, 1))
    #    for index in range(nClass) :
    #        temp = np.zeros((numCase, nClass))
    #        temp[:, index] = 1
    #
    #        tt = np.emath.log(np.exp(np.dot(X, m.weight)+ np.dot(temp, m.weightLabel) + m.biasH)+1)
    #
    #        FFcol = temp[:,index] * m.biasLabel[0,index] + np.sum(tt,axis = 1)
    #
    #        FF[:, index] = FFcol
    #
    #    [x, y] = np.where(np.abs(FF - np.max(FF, axis=1, keepdims=True)) < 1e-5)

    #    result = np.zeros(y.shape)

    #    for index in range(y.size) :
    #        result[index] = m.labels[y[index]]

    # The following part runs on GPU
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasH = cm.CUDAMatrix(cm.reformat(m.biasH))
    weightLabel = cm.CUDAMatrix(cm.reformat(m.weightLabel))
    biasLabel = cm.CUDAMatrix(cm.reformat(m.biasLabel))

    F = cm.CUDAMatrix(np.zeros((numCase, nClass)))
    Fcol = cm.CUDAMatrix(np.zeros((numCase, 1)))
    temp = cm.CUDAMatrix(np.zeros((numCase, nClass)))

    tt = cm.CUDAMatrix(np.zeros((numCase, biasH.asarray().size)))
    for index in range(nClass):
        temp.assign(0)

        temp.set_col_slice(index, index + 1, 1)

        tt = cm.dot(data, weight)
        tt.add_dot(temp, weightLabel)
        tt.add_row_vec(biasH)
        cm.log_1_plus_exp(tt, target=tt, exact=True)

        Fcol = cm.sum(tt, axis=1)
        Fcol.add_mult(temp.get_col_slice(index, index + 1),
                      biasLabel.numpy_array[0, index])

        F.set_col_slice(index, index + 1, Fcol)

        tt.free_device_memory()

    F.copy_to_host()
    [x, y] = np.where(
        np.abs(F.numpy_array -
               np.max(F.numpy_array, axis=1, keepdims=True)) < 1e-5)

    # free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasH.free_device_memory()
    biasLabel.free_device_memory()
    weightLabel.free_device_memory()

    F.free_device_memory()
    Fcol.free_device_memory()
    temp.free_device_memory()

    cm.shutdown()

    result = np.zeros(y.shape)

    for index in range(y.size):
        result[index] = m.labels[y[index]]

    return [result, F.numpy_array]

示例#31

0

显示文件

文件： eotgm_main.py 项目： FirstHandScientist/eotgms

    input = input.cuda()
    one, mone = one.cuda(), mone.cuda()
    noise, fixed_noise = noise.cuda(), fixed_noise.cuda()

# setup optimizer
if opt.adam:
    optimizerG = optim.Adam(netG.parameters(),
                            lr=opt.lrG,
                            betas=(opt.beta1, 0.999))
else:
    optimizerG = optim.RMSprop(netG.parameters(), lr=opt.lrG)

# initialization of cudamat
if opt.sinkgpu:
    cudamat.cublas_init()

normalizeL = torch.Tensor([opt.regL]).double()

SCORE = []

gen_iterations = 0
for epoch in range(opt.niter):
    data_iter = iter(dataloader)
    i = 0
    tmp_score = []
    while i < len(dataloader):
        ############################
        # (1) sample the empirical data first
        ###########################
        data = data_iter.next()

示例#32

0

显示文件

文件： DN.py 项目： stjordanis/DNSS2

    def calc_output_legacy(self, data, batch_size):
        """ Calculate the output (probababilies) for a set of data

        The purpose of this function is to calculate the output of a DN on 
        some set of data.  The values will calculated using rbm_cudamat
        on slices of data specified by the batch size

        """

        import cudamat as cm
        import rbm_numpy, rbm_cudamat

        # Initialize CUDA
        cm.cublas_init()
        cm.CUDAMatrix.init_random(1)

        if self.legacy_card_number != 0:
            cm.cuda_set_device(self.legacy_card_number)

        # Create output, use the size of the last layer to do this
        output = np.empty(
            (data.shape[0], self.arch[(self.layer_count - 1)]['node_count']))

        # Slice up data, handling batches of batch_size. USE INT DIVISION
        processed = 0
        for j in range(data.shape[0] // batch_size):

            curr_data = data[j * batch_size:(j + 1) * batch_size, :]

            for i in range(1, self.layer_count):

                # Handle a sigmoid node
                if self.arch[i]['node_type'] == 'S':
                    curr_data = \
                      rbm_cudamat.calc_hidden_probs(curr_data,
                                                    self.weights[i]['w'],
                                                    self.weights[i]['hb'],
                                                    batch_size)

            output[j * batch_size:(j + 1) * batch_size, :] = curr_data[:, :]
            processed = processed + batch_size

        # Now handle anything that was left over i.e., what didn't fit in
        if processed != data.shape[0]:

            curr_data = data[processed:, :]

            for i in range(1, self.layer_count):

                # Handle a sigmoid node
                if self.arch[i]['node_type'] == 'S':
                    curr_data = \
                      rbm_numpy.calc_hidden_probs(curr_data,
                                                  self.weights[i]['w'],
                                                  self.weights[i]['hb'])

            output[processed:, :] = curr_data[:, :]

        cm.cublas_shutdown()

        return output

示例#33

0

显示文件

文件： nn_cudamat.py 项目： 0ri0nX/deepnet

# This file shows how to implement a single hidden layer neural network for
# performing binary classification on the GPU using cudamat.

import pdb
import time
import numpy as np
import cudamat as cm
from cudamat import learn as cl
import util

# initialize CUDA
cm.cublas_init()

# load data
util.load('mnist49.dat', globals())

# Put training data onto the GPU.
dat_train = dat_train/255.
dat_train = dat_train - (np.mean(dat_train, 1)+10**-8)[:, np.newaxis]
dev_train = cm.CUDAMatrix(dat_train)
dev_lbl = cm.CUDAMatrix(lbl_train)

# training parameters
epsilon = 0.01
momentum = 0.9

num_epochs = 30
batch_size = 128
num_batches = dat_train.shape[1]/batch_size

# model parameters

示例#34

0

显示文件

文件： rbmT.py 项目： vbillys/DeepNet

def rbm(data, numHid, modelType="BB", **kwargs):
    """
    rbm defination
    data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities
           when type is GB, should be continuous real value. data should have a format of *.npy
    numHid : number nodes of and hidden layer
    type   : rbm type, can be set as BB or GB

additional inputs (specified as name value pairs or in struct)
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis
    penalty         weight decay factor
    batchsize       The number of training instances per batch
    verbose         For printing progress
    anneal          Flag. If set true, the penalty is annealed linearly
                through epochs to 10% of its original value

    OUTPUTS:
    model.type      Type of RBM (i.e. type of its visible and hidden units)
    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer
    model.top       The activity of the top layer, to be used when training
                    DBN's
    errors          The errors in reconstruction at every epoch
       """

    arg = util.processOptions(kwargs, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.9,\
                            maxEpoch = 50, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 50, \
                            verbose = True, \
                            anneal = False)
    [method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose, anneal] = [\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"],\
        arg["anneal"]
    ]

    # from which step, we start to compute the average
    avgStart = maxEpoch - avgLast

    # for weight decay use
    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(data.shape)

    if verbose:
        print "processing data"

    numVis = numDims
    numBatch = util.ceil(numCases, batchSize)

    # shuffle the data
    np.random.shuffle(data)

    # init CUDA
    #    cm.cuda_set_device()
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))

    # init weights
    weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1, numHid)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidActP2 = cm.empty((batchSize, numHid))
    visState = cm.empty((batchSize, numVis))
    hidState = cm.empty((batchSize, numHid))

    t = 1
    for epoch in range(maxEpoch):
        error = []

        if anneal:
            # apply linear weight decay
            penalty = oldPenalty - 0.9 * epoch / maxEpoch * oldPenalty

        for batch in range(numBatch):
            # train each data batch
            if batchSize * (batch + 1) > numCases:
                visTrue = deviceData.get_row_slice(batchSize * batch, numCases)
                batchSize = visTrue.shape[0]
            else:
                visTrue = deviceData.get_row_slice(batchSize * batch,
                                                   batchSize * (batch + 1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            # positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidState)

            if cmp(method, "SML") == 0:
                if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)):
                    pass  # here does not need in practical use
            elif cmp(method, "CD") == 0:
                pass

            # negetive phase
            if cmp(modelType, "BB") == 0:
                cm.dot(hidState, weight.transpose(), target=visActP)
                visActP.add_row_vec(biasV)
                visActP.apply_sigmoid()

                visState.fill_with_rand()
                visState.less_than(visActP, target=visState)
            elif cmp(modelType, "GB") == 0:
                cm.dot(hidState, weight.transpose(), target=visActP)
                visActP.add_row_vec(biasV)

                visActP.add(np.random.randn(batchSize, numVis),
                            target=visState)

            # another positive phase
            cm.dot(visState, weight, target=hidActP2)
            hidActP2.add_row_vec(biasH)
            hidActP2.apply_sigmoid()

            hidState.fill_with_rand()
            hidState.less_than(hidActP2, target=hidState)

            #update weight and bias
            dWeight = cm.dot(visTrue.transpose(), hidActP)
            dWeight.subtract_dot(visState.transpose(), hidActP2)
            dBiasV = visTrue.sum(axis=0).subtract(visState.sum(axis=0))
            dBiasH = hidActP.sum(axis=0).subtract(hidActP2.sum(axis=0))

            dWeight.divide(batchSize).subtract(weight.mult(penalty))
            dBiasV.divide(batchSize)
            dBiasH.divide(batchSize)

            weightInc.mult(momentum).add_mult(dWeight, eta)
            biasVInc.mult(momentum).add_mult(dBiasV, eta)
            biasHInc.mult(momentum).add_mult(dBiasH, eta)

            weight.add(weightInc)
            biasV.add(biasVInc)
            biasH.add(biasHInc)

            if epoch > avgStart:
                # apply average
                weightAgv.subtract(weightAgv.subtract(weight).mult(1.0 / t))
                biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0 / t))
                biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0 / t))
                t = t + 1
            else:
                weightAgv = weight
                biasVAgv = biasV
                biasHAgv = biasH

            # reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)
        if verbose:
            print "epoch %d/%d. Reconstruction error is %f " % (
                epoch + 1, maxEpoch, sum(error))

    # save rbm model
    top = cm.CUDAMatrix(np.zeros((numCases, numHid)))
    cm.dot(deviceData, weightAgv, target=top)
    top.add_row_vec(biasHAgv)
    top.apply_sigmoid()

    model_ = m.rbmModel(weightAgv, biasVAgv, biasHAgv, type=modelType, top=top)

    cm.shutdown()

    return model_

示例#35

0

显示文件

####################################################
# Fonction principale
####################################################
if __name__ == "__main__":
	
	# Vérifie les params
	if len(sys.argv) != 2:
		print "Donnez un nom de fichier!"
		exit()

	# Import les digits
	print "Importation des digits depuis {}".format(sys.argv[1])
	digitImport = MNISTImporter.Open(sys.argv[1])
	
	# Init CUBLAS
	cb.cublas_init()
	
	# Créer du réservoir
	reservoir = Oger.nodes.CUDAReservoirNode(input_dim = digitImport.nbInputs, output_dim = rc_Size, input_scaling = rc_InputScaling, spectral_radius = rc_SpectralRadius)
	readout = Oger.nodes.RidgeRegressionNode(output_dim = rc_nbDigits, dtype='float64')
	classifier = DigitClassifierNode(mnist_space = digitImport.interImagesSpace, label_space_ratio = digitImport.interImagesRatio, digit_space_ratio = digitImport.digitImageRatio, image_size = digitImport.imagesSize, nb_digit = rc_nbDigits, method = "average", input_dim = rc_nbDigits, dtype='float64')
	
	# Récupère une partie du jeu d'entrainement et des labels
	inputs, outputs						= digitImport.getTrainingSet(length = rc_TrainingLength)
	inputs_test, outputs_test			= digitImport.getTestSet(length = rc_TestLength)
	data = [None, [(inputs, outputs)], None]
	
	# Construction du flux
	flow = mdp.Flow([reservoir, readout, classifier], verbose=0)
	
	# Entrainement du réseau

示例#36

0

显示文件

def LockGPU(max_retries=10, board=-1):

    # Assuming you already got GPU lock
    cm.cuda_set_device(board)
    cm.cublas_init()
    return board

示例#37

0

显示文件

def main(
    full_vocab_file: str,
    repr_vocab_file: str,
    output: str,
    n_components: int,
    sim: str,
    sim_alignment_matrix: str,
    n_ngram: int,
    use_gpu: bool,
    processes: int,
) -> None:
    """Compute KPCA embeddings on a given data set."""
    n = n_ngram  # meh
    output = os.path.abspath(output)
    os.makedirs(output, exist_ok=True)

    full_vocab = _preprocess_vocab_file(full_vocab_file)

    if repr_vocab_file is None:
        repr_vocab = full_vocab
    else:
        repr_vocab = _preprocess_vocab_file(repr_vocab_file)

    params_path = os.path.join(output, 'training_manifest.json')
    secho(f'Outputting training information to {params_path}')
    manifest = dict(
        sim=sim,
        n=n,
        len_full_vocab=len(full_vocab),
        len_repr_vocab=len(repr_vocab),
        kernels=kernels,
    )
    with open(params_path, 'w') as file:
        json.dump(manifest, file, sort_keys=True, indent=2)

    if use_gpu:
        import cudamat as cm
        cm.cublas_init()

    if sim == 'global-alignment':
        secho(
            f'Computing global alignment similarities with {sim_alignment_matrix}'
        )
        repr_similarity_matrix = calculate_global_alignment_similarity_matrix(
            full_vocab=repr_vocab,
            repr_vocab=repr_vocab,
            processes=processes,
            matrix=sim_alignment_matrix,
            tqdm_desc=f'{EMOJI} Computing self-similarity matrix for '
            f'repr vocab with global alignment ({sim_alignment_matrix})')
        full_similarity_matrix = calculate_global_alignment_similarity_matrix(
            full_vocab=full_vocab,
            repr_vocab=repr_vocab,
            processes=processes,
            matrix=sim_alignment_matrix,
            tqdm_desc=f'{EMOJI} Computing similarity matrix between '
            f'full/repr vocab with global alignment ({sim_alignment_matrix})')
    else:
        alphabet = set(itt.chain.from_iterable(repr_vocab))
        alphabet.add(" ")

        ngram_to_index = {
            ngram: i
            for i, ngram in enumerate(
                ["".join(t) for t in itt.product(alphabet, repeat=n)])
        }

        if sim == "ngram_intersec":
            secho(f'Computing n-gram sparse similarities with {sim}')
            repr_similarity_matrix = compute_similarity_matrix_ngram_sparse(
                full_vocab=repr_vocab,
                repr_vocab=repr_vocab,
                ngram_to_index=ngram_to_index,
                n=n,
            )
            full_similarity_matrix = compute_similarity_matrix_ngram_sparse(
                full_vocab=full_vocab,
                repr_vocab=repr_vocab,
                ngram_to_index=ngram_to_index,
                n=n,
            )
        else:  # sim == 'ngram_sim'
            secho(f'Computing n-gram similarities with {sim}')
            repr_similarity_matrix = compute_similarity_matrix_ngram_parallel(
                full_vocab=repr_vocab,
                repr_vocab=repr_vocab,
                n=n,
                ngram_to_index=ngram_to_index,
                processes=processes,  # Extra because this gets multi-processed
            )
            full_similarity_matrix = compute_similarity_matrix_ngram_parallel(
                full_vocab=full_vocab,
                repr_vocab=repr_vocab,
                n=n,
                ngram_to_index=ngram_to_index,
                processes=processes,  # Extra because this gets multi-processed
            )

    repr_similarity_matrix_path = os.path.join(output,
                                               f"repr_similarity_matrix.npy")
    secho(
        f"Saving the repr similarity matrix for the full vocabulary to {repr_similarity_matrix_path}"
    )
    np.save(repr_similarity_matrix_path,
            repr_similarity_matrix,
            allow_pickle=False)

    full_similarity_matrix_path = os.path.join(output,
                                               f"full_similarity_matrix.npy")
    secho(
        f"Saving the full similarity matrix for the full vocabulary to {full_similarity_matrix_path}"
    )
    np.save(full_similarity_matrix_path,
            full_similarity_matrix,
            allow_pickle=False)

    optim_folder = os.path.join(output, 'optim')
    os.makedirs(optim_folder, exist_ok=True)

    if n_components is None:
        n_components = int(0.5 + len(repr_vocab) * 2 / 3)

    optimize_projections(
        output=optim_folder,
        repr_similarity_matrix=repr_similarity_matrix,
        full_similarity_matrix=full_similarity_matrix,
        n_components=n_components,
        similarity_type=sim,
        use_gpu=use_gpu,
    )

    if use_gpu:  # only shut down after all loops have used this function
        import cudamat as cm
        cm.shutdown()

    secho(f"done. Enjoy your {make_ratvec(3)}")

示例#38

0

显示文件

文件： RNN_cuda.py 项目： Williangalvani/EchoStateProjects

    def __init__(self,neu,n_in,n_out,
                 gama=0.5,ro=1,psi=0.5,in_scale=0.1,
                 bias_scale=0.5,alfa=10,forget = 1,
                 initial_filename="initial",
                 load_initial = False,save_initial = False,noise_amplitude = 0):
        #All matrixes are initialized under the normal distribution.
        cm.cublas_init()
        print "initializing reservoir"
        print n_in,"Number of inputs"
        self.neu = neu
        self.n_in = n_in
        self.n_out = n_out
        self.noise_amplitude = noise_amplitude

        # Reservoir Weight matrix.
        print "initializing reservoir matrix"
        self.Wrr0 = cm.CUDAMatrix(np.random.normal(0,1,[neu,neu],))
        print "initializing input matrix"
        # input-reservoir weight matrix
        self.Wir0 = cm.CUDAMatrix(np.random.normal(0,1,[neu,n_in]))
        # bias-reservoir weight matrix
        print "initializing bias matrix"
        self.Wbr0 = cm.CUDAMatrix(np.random.normal(0,1,[neu,1]))

        self.Wrr = cm.empty(self.Wrr0.shape)
        self.Wbr = cm.empty(self.Wbr0.shape)
        self.Wir = cm.empty(self.Wir0.shape)

        #self.Wbo = np.random.normal(0,1,[n_out,1])
        # reservoir-output weight matrix
        print "initializing Wro"
        self.Wro = cm.CUDAMatrix(np.random.normal(0,1,[n_out,neu]))

        self.leakrate = gama #the network's leak rate
        self.ro = ro #the network's desired spectral radius
        self.psi = psi #the network's sparcity, in 0 to 1 notation
        self.in_scale = in_scale #the scaling of Wir.
        self.bias_scale = bias_scale #the scaling of Wbr

        # learning rate of the Recursive Least Squares Algorithm
        self.alfa = alfa
        # forget factor of the RLS Algorithm
        self.forget = forget

        #self.a = np.random.normal(0, 1, [neu, 1])
        self.a = cm.CUDAMatrix(np.zeros([neu, 1]))
        #save if save is enabled
        if save_initial:
            self.save_initial_fun(initial_filename)

        #load if load is enabled
        if load_initial:
            self.LoedInitial(initial_filename)

        # the probability of a memeber of the Matrix Wrr being zero is psi.
        print "define sparseness"
        if psi > 0:
            self.Wrr = Sparcity(self.Wrr0,self.psi)
        else:
            self.Wrr.assign(self.Wrr0)
        #forcing Wrr to have ro as the maximum eigenvalue
        print "calculating eigenvalues"
        eigs = np.linalg.eigvals(self.Wrr.asarray())
        print "finding maximum eigenvalue"
        radius = np.abs(np.max(eigs))
        #normalize matrix
        print "normalize reservoir"
        self.Wrr.divide(np.asscalar(radius))
        #set its spectral radius to rho
        self.Wrr.mult(ro)

        #scale tbe matrices
        self.Wbr0.mult(bias_scale,target = self.Wbr)
        self.Wir0.mult(in_scale, target=self.Wir)



        #initial conditions variable forget factor.
        self.sigma_e = 0.001
        self.sigma_q = 0.001
        self.sigma_v = 0.001
        self.K_a = 6.0
        self.K_b = 3.0*self.K_a

        #covariance matrix
        self.P = cm.CUDAMatrix(np.eye(neu)/alfa)
        print "Reservoir initialization Done"

示例#39

0

显示文件

文件： onmtf_cuda.py 项目： lucasbrunialti/biclustering-experiments

def matrix_factorization_clustering(X_aux, k, l, norm=False, num_iters=100):
    cm.cublas_init()

    m, n = X_aux.shape
    U = cm.CUDAMatrix(np.random.rand(m, k))
    S = cm.CUDAMatrix(np.random.rand(k, l))
    V = cm.CUDAMatrix(np.random.rand(n, l))

    X = cm.CUDAMatrix(X_aux)

    # if norm:
    #     X = Normalizer().fit_transform(X)

    XV = cm.CUDAMatrix(np.random.rand(m, l))
    XVSt = cm.CUDAMatrix(np.random.rand(m, k))
    US = cm.CUDAMatrix(np.random.rand(m, l))
    USVt = cm.CUDAMatrix(np.random.rand(m, n))
    USVtXt = cm.CUDAMatrix(np.random.rand(m, m))
    USVtXtU = cm.CUDAMatrix(np.random.rand(m, k))
    U_aux = cm.CUDAMatrix(np.random.rand(m, k))

    XtUS = cm.CUDAMatrix(np.random.rand(m, l))
    VSt = cm.CUDAMatrix(np.random.rand(n, k))
    VStUt = cm.CUDAMatrix(np.random.rand(n, m))
    UtX = cm.CUDAMatrix(np.random.rand(k, n))
    VStUtXV = cm.CUDAMatrix(np.random.rand(n, l))
    V_aux = cm.CUDAMatrix(np.random.rand(n, l))

    UtXV = cm.CUDAMatrix(np.random.rand(k, l))
    UtUS = cm.CUDAMatrix(np.random.rand(k, l))
    UtUSVt = cm.CUDAMatrix(np.random.rand(k, n))
    UtUSVtV = cm.CUDAMatrix(np.random.rand(k, l))
    S_aux = cm.CUDAMatrix(np.random.rand(k, l))

    error_best = np.inf
    error = np.inf

    for i in range(num_iters):
        # compute U
        cm.dot(X, V, target=XV)
        cm.dot(XV, S.T, target=XVSt)

        if i is 0:
            cm.dot(U, S, target=US)
            cm.dot(US, V.T, target=USVt)
        cm.dot(USVt, X.T, target=USVtXt)
        cm.dot(USVtXt, U, target=USVtXtU)

        cm.divide(XVSt, USVtXtU, U_aux)
        cm.mult(U, U_aux, U)

        # compute V
        cm.dot(U, S, target=US)
        cm.dot(X.T, US, target=XtUS)
        cm.dot(V, S.T, target=VSt)
        cm.dot(VSt, U.T, target=VStUt)
        cm.dot(VStUt, XV, target=VStUtXV)

        cm.divide(XtUS, VStUtXV, target=V_aux)
        cm.mult(V, V_aux, V)

        # compute S
        cm.dot(U.T, X, target=UtX)
        cm.dot(UtX, V, target=UtXV)

        cm.dot(U.T, US, target=UtUS)
        cm.dot(UtUS, V.T, UtUSVt)
        cm.dot(UtUSVt, V, target=UtUSVtV)

        cm.divide(UtXV, UtUSVtV, target=S_aux)
        cm.mult(S, S_aux, target=S)

        error_ant = error

        cm.dot(U, S, target=US)
        cm.dot(US, V.T, target=USVt)
        error = cm.sum(cm.pow(cm.subtract(X, USVt), 2), axis=0)

        if error < error_best:
            U_best_cm = U
            S_best_cm = S
            V_best_cm = V
            error_best = error

        if np.abs(error - error_ant) <= 0.000001:
            break

        U_best = U_best_cm.asarray()
        S_best = S_best_cm.asarray()
        V_best = V_best_cm.asarray()

    Du = np.diag(np.ones(m).dot(U_best))
    Dv = np.diag(np.ones(n).dot(V_best))

    U_norm = U_best.dot( np.diag(S_best.dot(Dv).dot(np.ones(l))) )
    V_norm = V_best.dot( np.diag(np.ones(k).dot(Du).dot(S_best)) )

    rows_ind = np.argmax(U_best, axis=1)
    cols_ind = np.argmax(V_best, axis=1)

    cm.shutdown()

    return U_norm, S_best, V_norm, rows_ind, cols_ind, error_best

示例#40

0

显示文件

# NMF algorithms in cudamat

import numpy as np
import cudamat as cm
import NMFbase

# initialize the cudamat library
cm.cublas_init()

class NMFcudamat(NMFbase.NMFbase):

    def getH(self):
        return(self.H_gpu.asarray())

    def getW(self):
        return(self.W_gpu.asarray())

class NMF(NMFcudamat):

    def setVariables(self):

        self.H_gpu = cm.CUDAMatrix(self.H)
        self.W_gpu = cm.CUDAMatrix(self.W)
        self.X_gpu = cm.CUDAMatrix(self.X)
        self.WTW_gpu = cm.empty((self.rank, self.rank))
        self.WTWH_gpu = cm.empty(self.H.shape)
        self.WTX_gpu = cm.empty(self.H.shape)
        self.XHT_gpu = cm.empty(self.W.shape)
        self.WH_gpu = cm.empty(self.X.shape)
        self.WHHT_gpu = cm.empty(self.W.shape)

示例#41

0

显示文件

文件： onmtf_cuda.py 项目： saltydizz/biclustering-experiments

def matrix_factorization_clustering(X_aux, k, l, norm=False, num_iters=100):
    cm.cublas_init()

    m, n = X_aux.shape
    U = cm.CUDAMatrix(np.random.rand(m, k))
    S = cm.CUDAMatrix(np.random.rand(k, l))
    V = cm.CUDAMatrix(np.random.rand(n, l))

    X = cm.CUDAMatrix(X_aux)

    # if norm:
    #     X = Normalizer().fit_transform(X)

    XV = cm.CUDAMatrix(np.random.rand(m, l))
    XVSt = cm.CUDAMatrix(np.random.rand(m, k))
    US = cm.CUDAMatrix(np.random.rand(m, l))
    USVt = cm.CUDAMatrix(np.random.rand(m, n))
    USVtXt = cm.CUDAMatrix(np.random.rand(m, m))
    USVtXtU = cm.CUDAMatrix(np.random.rand(m, k))
    U_aux = cm.CUDAMatrix(np.random.rand(m, k))

    XtUS = cm.CUDAMatrix(np.random.rand(m, l))
    VSt = cm.CUDAMatrix(np.random.rand(n, k))
    VStUt = cm.CUDAMatrix(np.random.rand(n, m))
    UtX = cm.CUDAMatrix(np.random.rand(k, n))
    VStUtXV = cm.CUDAMatrix(np.random.rand(n, l))
    V_aux = cm.CUDAMatrix(np.random.rand(n, l))

    UtXV = cm.CUDAMatrix(np.random.rand(k, l))
    UtUS = cm.CUDAMatrix(np.random.rand(k, l))
    UtUSVt = cm.CUDAMatrix(np.random.rand(k, n))
    UtUSVtV = cm.CUDAMatrix(np.random.rand(k, l))
    S_aux = cm.CUDAMatrix(np.random.rand(k, l))

    error_best = np.inf
    error = np.inf

    for i in range(num_iters):
        # compute U
        cm.dot(X, V, target=XV)
        cm.dot(XV, S.T, target=XVSt)

        if i is 0:
            cm.dot(U, S, target=US)
            cm.dot(US, V.T, target=USVt)
        cm.dot(USVt, X.T, target=USVtXt)
        cm.dot(USVtXt, U, target=USVtXtU)

        cm.divide(XVSt, USVtXtU, U_aux)
        cm.mult(U, U_aux, U)

        # compute V
        cm.dot(U, S, target=US)
        cm.dot(X.T, US, target=XtUS)
        cm.dot(V, S.T, target=VSt)
        cm.dot(VSt, U.T, target=VStUt)
        cm.dot(VStUt, XV, target=VStUtXV)

        cm.divide(XtUS, VStUtXV, target=V_aux)
        cm.mult(V, V_aux, V)

        # compute S
        cm.dot(U.T, X, target=UtX)
        cm.dot(UtX, V, target=UtXV)

        cm.dot(U.T, US, target=UtUS)
        cm.dot(UtUS, V.T, UtUSVt)
        cm.dot(UtUSVt, V, target=UtUSVtV)

        cm.divide(UtXV, UtUSVtV, target=S_aux)
        cm.mult(S, S_aux, target=S)

        error_ant = error

        cm.dot(U, S, target=US)
        cm.dot(US, V.T, target=USVt)
        error = cm.sum(cm.pow(cm.subtract(X, USVt), 2), axis=0)

        if error < error_best:
            U_best_cm = U
            S_best_cm = S
            V_best_cm = V
            error_best = error

        if np.abs(error - error_ant) <= 0.000001:
            break

        U_best = U_best_cm.asarray()
        S_best = S_best_cm.asarray()
        V_best = V_best_cm.asarray()

    Du = np.diag(np.ones(m).dot(U_best))
    Dv = np.diag(np.ones(n).dot(V_best))

    U_norm = U_best.dot(np.diag(S_best.dot(Dv).dot(np.ones(l))))
    V_norm = V_best.dot(np.diag(np.ones(k).dot(Du).dot(S_best)))

    rows_ind = np.argmax(U_best, axis=1)
    cols_ind = np.argmax(V_best, axis=1)

    cm.shutdown()

    return U_norm, S_best, V_norm, rows_ind, cols_ind, error_best