def rbmHtoV(m, X): """convey data fron hidden layer to visible layer""" cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasV = cm.CUDAMatrix(cm.reformat(m.biasV)) nCase = X.shape[0] nVis = biasV.asarray().size VisActP = cm.CUDAMatrix(np.zeros((nCase, nVis))) if m.type == "BB": cm.dot(data, weight.T, target=VisActP) VisActP.add_row_vec(biasV) VisActP.apply_sigmoid() elif m.type == "BG": cm.dot(data, weight.T, target=VisActP) VisActP.add_row_vec(biasV) elif m.type == "GB": pass result = VisActP.asarray() #free device memory data.free_device_memory() weight.free_device_memory() biasV.free_device_memory() VisActP.free_device_memory() cm.shutdown() return result
def __init__(self, inputDim, outputDim, layerSize, numLayers, maxBatch, train=True, temporalLayer=-1): # Initialize cublas cm.cublas_init() self.outputDim = outputDim self.inputDim = inputDim self.layerSize = layerSize self.numLayers = numLayers self.layerSizes = [layerSize] * numLayers self.maxBatch = maxBatch self.train = train if not self.train: np.seterr(all='ignore') if temporalLayer <= 0 or temporalLayer >= numLayers: self.temporalLayer = -1 else: self.temporalLayer = temporalLayer self.maxAct = 20.0
def rbmHtoV(m, X) : """convey data fron hidden layer to visible layer""" cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasV = cm.CUDAMatrix(cm.reformat(m.biasV)) nCase = X.shape[0] nVis = biasV.asarray().size VisActP = cm.CUDAMatrix(np.zeros((nCase, nVis))) if m.type == "BB" : cm.dot(data, weight.T, target = VisActP) VisActP.add_row_vec(biasV) VisActP.apply_sigmoid() elif m.type == "BG" : cm.dot(data, weight.T, target = VisActP) VisActP.add_row_vec(biasV) elif m.type == "GB" : pass result = VisActP.asarray() #free device memory data.free_device_memory() weight.free_device_memory() biasV.free_device_memory() VisActP.free_device_memory() cm.shutdown() return result
def train_init(self): # init cudamat cm.cublas_init() cm.CUDAMatrix.init_random(1) self.Wgpu = cm.CUDAMatrix(self.W) self.speed = cm.empty(self.W.shape) self.speed.assign(0)
def __init__(self,inputDim,outputDim,layerSize,numLayers,maxBatch,train=True): # Initialize cublas cm.cublas_init() self.outputDim = outputDim self.inputDim = inputDim self.layerSizes = [layerSize]*numLayers self.maxBatch = maxBatch self.train = train
def LockGPU(): board = gpu_lock.obtain_lock_id() if board == -1: print 'No GPU board available.' sys.exit(1) else: cm.cuda_set_device(board) cm.cublas_init() return board
def LockGPU(max_retries=10): for retry_count in range(max_retries): board = gpu_lock.obtain_lock_id() if board != -1: break if board == -1: print 'No GPU board available.' sys.exit(1) else: cm.cuda_set_device(board) cm.cublas_init()
def LockGPU(max_retries=10): for retry_count in range(max_retries): board = gpu_lock.obtain_lock_id() if board != -1: break if board == -1: print "No GPU board available." sys.exit(1) else: cm.cuda_set_device(board) cm.cublas_init()
def LockGPU(max_retries=10): """ Locks a free GPU board and returns its id. """ for retry_count in range(max_retries): board = gpu_lock.obtain_lock_id() if board != -1: break sleep(1) if board == -1: print 'No GPU board available.' sys.exit(1) else: cm.cuda_set_device(board) cm.cublas_init() return board
def LockGPU(max_retries=10, board=-1): retry_count = 0 while board == -1 and retry_count < max_retries: board = gpu_lock.obtain_lock_id() if board == -1: sleep(1) retry_count += 1 if board == -1: print 'No GPU board available.' sys.exit(1) else: cm.cuda_set_device(board) cm.cublas_init() return board
def LockGPU(max_retries=10, board=-1): # retry_count = 0 # while board == -1 and retry_count < max_retries: # board = gpu_lock.obtain_lock_id() # if board == -1: # sleep(1) # retry_count += 1 # if board == -1: # print 'No GPU board available.' # sys.exit(1) # else: # cm.cuda_set_device(board) # cm.cublas_init() board = 3 cm.cuda_set_device(board) cm.cublas_init() return board
def main(): # initialize CUDA cm.cublas_init() # training parameters epsilon = 0.01 momentum = 0.9 num_epochs = 30 batch_size = 128 num_batches = 92 # model parameters dim_in = 784 dim_out = 1 num_hid = 1024 # load data util.load('data/mnist49.dat', globals()) global dat_train global dat_test global lbl_train global lbl_test # Put training data onto the GPU. dat_train = dat_train/255. dat_train = dat_train - (np.mean(dat_train, 1)+10**-8)[:, np.newaxis] dev_train = cm.CUDAMatrix(dat_train) dev_lbl = cm.CUDAMatrix(lbl_train) net = ffnet.FFNet(epsilon, momentum, num_epochs, batch_size, num_batches, dim_in, dim_out, num_hid) net.train(dev_train, dev_lbl) # Load test data onto the GPU. dat_test = dat_test/255. dat_test = dat_test - np.mean(dat_test, 1)[:, np.newaxis] dev_test = cm.CUDAMatrix(dat_test) dev_lbl = cm.CUDAMatrix(lbl_test) net.reinitTestStorage(dat_test.shape[1]) net.test(dev_test, dev_lbl) cm.cublas_shutdown()
def __init__(self, N=1000, pz=1, pg=0.1, g=1.5, alpha=1, dt=0.1, num_fits=1, num_inputs=0, state=None): cm.cublas_init() if state is not None: self.from_dict(state) else: self.N = N self.pg = pg self.pz = pz self.g = g self.alpha = alpha self.DT = dt self.num_fits = num_fits scale = 1.0 / np.sqrt(self.pg * self.N) M_rvs = stats.norm(loc=0, scale=scale).rvs self.M = sp.sparse.random(N, N, pg, data_rvs=M_rvs) * g self.M = cm.CUDAMatrix(self.M.toarray()) self.P = (1.0 / self.alpha) * np.identity(N) self.wf = cm.CUDAMatrix(np.random.uniform(-1, 1, (N, num_fits))) #self.wo = np.expand_dims(stats.norm(loc=0,scale=(1.0/np.sqrt(N))).rvs(N),num_fits) self.wo = cm.CUDAMatrix(np.zeros((N, num_fits))) self.dw = np.zeros((N, num_fits)) self.woc = np.zeros((N, 1)) self.wfc = np.random.uniform(-1, 1, (N, 1)) self.x = cm.CUDAMatrix(np.expand_dims(0.5 * np.random.randn(N), 1)) self.xdt = cm.empty(self.x.shape).assign(0) self.r = cm.tanh(self.x) self.rdt = cm.empty(self.r.shape).assign(0) self.z = cm.CUDAMatrix( np.expand_dims(0.5 * np.random.randn(num_fits), 1)) self.zdt = cm.empty(self.z.shape).assign(0) self.z_ctl = np.expand_dims(0.5 * np.random.randn(1), 1)
def __init__(self, n_hidden=10, max_iter=10000, tol=1e-5, anneal=True, missing_values=None, discourage_overlap=True, gaussianize='standard', gpu=False, verbose=False, precision=1e-8, seed=None): self.m = n_hidden # Number of latent factors to learn self.max_iter = max_iter # Number of iterations to try self.tol = tol # Threshold for convergence self.anneal = anneal self.eps = 0 # If anneal is True, it's adjusted during optimization to avoid local minima self.missing_values = missing_values self.discourage_overlap = discourage_overlap # Whether or not to discourage overlapping latent factors self.gaussianize = gaussianize # Preprocess data: 'standard' scales to zero mean and unit variance self.gpu = gpu # Enable GPU support for some large matrix multiplications. if self.gpu: cm.cublas_init() self.yscale = 1. # Can be arbitrary, but sets the scale of Y np.random.seed(seed) # Set seed for deterministic results self.verbose = verbose if verbose: np.set_printoptions(precision=3, suppress=True, linewidth=160) print(('Linear CorEx with {:d} latent factors'.format(n_hidden))) self.precision = precision # Initialize these when we fit on data self.n_samples, self.nv = 0, 0 # Number of samples/variables in input data self.ws = np.zeros((0, 0)) # m by nv array of weights self.moments = {} # Dictionary of moments self.theta = None # Parameters for preprocessing each variable self.history = {} # Keep track of values for each iteration self.last_update = 0 # Used for momentum methods
def __init__(self,inputDim,outputDim,layerSize,numLayers,maxBatch, train=True,temporalLayer=-1): # Initialize cublas cm.cublas_init() self.outputDim = outputDim self.inputDim = inputDim self.layerSize = layerSize self.numLayers = numLayers self.layerSizes = [layerSize]*numLayers self.maxBatch = maxBatch self.train = train if not self.train: np.seterr(all='ignore') if temporalLayer <= 0 or temporalLayer >= numLayers: self.temporalLayer = -1 else: self.temporalLayer = temporalLayer self.maxAct = 20.0
def rbmVtoH(m, X) : """convey data fron visual layer to hidden layer""" cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasH = cm.CUDAMatrix(cm.reformat(m.biasH)) nCase = X.shape[0] nHid = biasH.asarray().size hidActP = cm.CUDAMatrix(np.zeros((nCase, nHid))) if m.type == "BB" : cm.dot(data, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() elif m.type == "BG" : cm.dot(data, weight, target = hidActP) hidActP.add_row_vec(biasH) elif m.type == "GB" : pass result = hidActP.asarray() # free device memory data.free_device_memory() weight.free_device_memory() biasH.free_device_memory() hidActP.free_device_memory() cm.shutdown() return result
def train(self): ''' Main train function : modified version of the original train function. Additions : GPU selection (useful for multi-GPU machines) Saving the sum of the square of the data for post-processing Visible data are saved Data samples are permuted for training Weights are saved every 100 training epochs Training energy is visualized every 100 training epochs NOTE : anneal learning rate used in the initial code, is NOT used here! ''' #plt.ion() f1 = plt.figure() ax1 = f1.add_subplot(111) #ax2 = f1.add_subplot(122) #plt.show() cmt.cuda_set_device(self.gpuId) cmt.cublas_init() cmt.CUDAMatrix.init_random(1) np.random.seed(self.npRandSeed) prng = RandomState(self.npRandState) ################################################################ ##################### CHANGE PATH ############################## # Move to current experiment path: os.chdir(self.saveDir) # Get current path: os.getcwd() self.plotsDir = 'plots' #self.probabilitiesDir = 'p_all' if not os.path.isdir(self.plotsDir): os.makedirs(self.plotsDir) if not os.path.isdir(self.plotsDir + '/energy'): os.makedirs(self.plotsDir + '/energy') #if not os.path.isdir(self.probabilitiesDir): # os.makedirs(self.probabilitiesDir) if not os.path.isdir('weights'): os.makedirs('weights') d = self.d.astype(np.float32) print("visible size: ", d.shape) dsq = np.square(d) lsq = np.sum(dsq, axis=0) with open('lsqComplete.pkl', 'wb') as pklFile: cPickle.dump(lsq, pklFile) del dsq, lsq # Save visible data : visData = d np.savez('visData.npz', data=d, obsKeys=self.obsKeys, epochTime=self.epochTime) with open('visData.txt', 'w') as f: f.write("\n Dataset : %s" % (self.dataFilename)) f.write("\n visData size: %s " % str(visData.shape)) f.write("\n visData type: %s " % str(visData.dtype)) f.write("\n \n visData Range: %s " % str(np.max(visData, axis=0) - np.min(visData, axis=0))) f.write("\n \n visData min: %s " % str(np.min(visData, axis=0))) f.write("\n \n visData max: %s " % str(np.max(visData, axis=0))) f.write("\n \n visData mean: %s " % str(np.mean(visData, axis=0))) f.write("\n \n visData std: %s " % str(np.std(visData, axis=0))) f.close() del visData #if not needed for computing the latent states permIdx = prng.permutation(d.shape[0]) d = d[permIdx, :] #subsetting train and test datasets #trainPerc = 0.7 #trainSampNum = int(np.ceil(trainPerc*d.shape[0])) #trainSampNum = int(np.floor(trainSampNum/self.batch_size)*self.batch_size) #testSampNum = int(d.shape[0]-trainSampNum-1) # The test dataset is not used at the moment, it can be used as # a validation set to check for overfitting. To use it, uncomment # all the variables with 'test' in their name #~ d_test = d[trainSampNum+1:,:] #d = d[:trainSampNum,:] #obsKeys = self.obsKeys[:trainSampNum] totnumcases = d.shape[0] num_vis = d.shape[1] num_batches = int(totnumcases / self.batch_size) print("num_batches: ", num_batches) dev_dat = cmt.CUDAMatrix(d.T) # VxP #~ test_dat = cmt.CUDAMatrix(d_test.T) del d, self.d, self.epochTime, self.obsKeys # training parameters (as in the original code by Ranzato) epsilon = self.epsilon epsilonVF = 2 * epsilon epsilonFH = 0.02 * epsilon epsilonb = 0.02 * epsilon epsilonw_mean = 0.2 * epsilon epsilonb_mean = 0.1 * epsilon weightcost_final = self.weightcost_final # HMC setting hmc_step_nr = self.hmc_step_nr hmc_step = 0.01 hmc_target_ave_rej = self.hmc_target_ave_rej hmc_ave_rej = hmc_target_ave_rej # initialize weights VF = cmt.CUDAMatrix( np.array(0.02 * prng.randn(num_vis, self.num_fac), dtype=np.float32, order='F')) # VxH if self.apply_mask == 0: FH = cmt.CUDAMatrix( np.array(np.eye(self.num_fac, self.num_hid_cov), dtype=np.float32, order='F')) # HxO else: dd = loadmat( 'your_FHinit_mask_file.mat' ) # see CVPR2010paper_material/topo2D_3x3_stride2_576filt.mat for an example FH = cmt.CUDAMatrix(np.array(dd["FH"], dtype=np.float32, order='F')) bias_cov = cmt.CUDAMatrix( np.array(2.0 * np.ones((self.num_hid_cov, 1)), dtype=np.float32, order='F')) bias_vis = cmt.CUDAMatrix( np.array(np.zeros((num_vis, 1)), dtype=np.float32, order='F')) w_mean = cmt.CUDAMatrix( np.array(0.05 * prng.randn(num_vis, self.num_hid_mean), dtype=np.float32, order='F')) # VxH bias_mean = cmt.CUDAMatrix( np.array(-2.0 * np.ones((self.num_hid_mean, 1)), dtype=np.float32, order='F')) # initialize variables to store derivatives VFinc = cmt.CUDAMatrix( np.array(np.zeros((num_vis, self.num_fac)), dtype=np.float32, order='F')) FHinc = cmt.CUDAMatrix( np.array(np.zeros((self.num_fac, self.num_hid_cov)), dtype=np.float32, order='F')) bias_covinc = cmt.CUDAMatrix( np.array(np.zeros((self.num_hid_cov, 1)), dtype=np.float32, order='F')) bias_visinc = cmt.CUDAMatrix( np.array(np.zeros((num_vis, 1)), dtype=np.float32, order='F')) w_meaninc = cmt.CUDAMatrix( np.array(np.zeros((num_vis, self.num_hid_mean)), dtype=np.float32, order='F')) bias_meaninc = cmt.CUDAMatrix( np.array(np.zeros((self.num_hid_mean, 1)), dtype=np.float32, order='F')) # initialize temporary storage data = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) # VxP normdata = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) # VxP negdataini = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) # VxP feat = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, self.batch_size)), dtype=np.float32, order='F')) featsq = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, self.batch_size)), dtype=np.float32, order='F')) negdata = cmt.CUDAMatrix( np.array(prng.randn(num_vis, self.batch_size), dtype=np.float32, order='F')) old_energy = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) new_energy = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) energy = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) gradient = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) # VxP normgradient = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) # VxP thresh = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) feat_mean = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_mean, self.batch_size)), dtype=np.float32, order='F')) vel = cmt.CUDAMatrix( np.array(prng.randn(num_vis, self.batch_size), dtype=np.float32, order='F')) length = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) # 1xP lengthsq = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) # 1xP normcoeff = cmt.CUDAMatrix( np.array(np.zeros((1, self.batch_size)), dtype=np.float32, order='F')) # 1xP # commented to avoid computing the energy on test data #~ data_test = cmt.CUDAMatrix( np.array(np.empty((num_vis, testSampNum)), dtype=np.float32, order='F')) # Vxtest_batch #~ normdata_test = cmt.CUDAMatrix( np.array(np.empty((num_vis, testSampNum)), dtype=np.float32, order='F')) # Vxtest_batch #~ length_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) # 1xtest_batch #~ lengthsq_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) # 1xtest_batch #~ normcoeff_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) # 1xtest_batch #~ vel_test = cmt.CUDAMatrix( np.array(prng.randn(num_vis, testSampNum), dtype=np.float32, order='F')) #~ feat_test = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, testSampNum)), dtype=np.float32, order='F')) #~ featsq_test = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, testSampNum)), dtype=np.float32, order='F')) #~ feat_mean_test = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_mean, testSampNum)), dtype=np.float32, order='F')) #~ energy_test = cmt.CUDAMatrix( np.array(np.zeros((1, testSampNum)), dtype=np.float32, order='F')) if self.apply_mask == 1: # this used to constrain very large FH matrices only allowing to change values in a neighborhood dd = loadmat('your_FHinit_mask_file.mat') mask = cmt.CUDAMatrix( np.array(dd["mask"], dtype=np.float32, order='F')) normVF = 1 small = 0.5 # other temporary vars t1 = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_cov, self.batch_size)), dtype=np.float32, order='F')) t2 = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_cov, self.batch_size)), dtype=np.float32, order='F')) t3 = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, self.batch_size)), dtype=np.float32, order='F')) t4 = cmt.CUDAMatrix( np.array(np.empty((1, self.batch_size)), dtype=np.float32, order='F')) t5 = cmt.CUDAMatrix( np.array(np.empty((1, 1)), dtype=np.float32, order='F')) t6 = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) t7 = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.batch_size)), dtype=np.float32, order='F')) t8 = cmt.CUDAMatrix( np.array(np.empty((num_vis, self.num_fac)), dtype=np.float32, order='F')) t9 = cmt.CUDAMatrix( np.array(np.zeros((self.num_fac, self.num_hid_cov)), dtype=np.float32, order='F')) t10 = cmt.CUDAMatrix( np.array(np.empty((1, self.num_fac)), dtype=np.float32, order='F')) t11 = cmt.CUDAMatrix( np.array(np.empty((1, self.num_hid_cov)), dtype=np.float32, order='F')) # commented to avoid computing the energy on test data #~ t1_test = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_cov, testSampNum)), dtype=np.float32, order='F')) #~ t2_test = cmt.CUDAMatrix( np.array(np.empty((self.num_hid_cov, testSampNum)), dtype=np.float32, order='F')) #~ t3_test = cmt.CUDAMatrix( np.array(np.empty((self.num_fac, testSampNum)), dtype=np.float32, order='F')) #~ t4_test = cmt.CUDAMatrix( np.array(np.empty((1,testSampNum)), dtype=np.float32, order='F')) #~ t5_test = cmt.CUDAMatrix( np.array(np.empty((1,1)), dtype=np.float32, order='F')) #~ t6_test = cmt.CUDAMatrix( np.array(np.empty((num_vis, testSampNum)), dtype=np.float32, order='F')) meanEnergy = np.zeros(self.num_epochs) minEnergy = np.zeros(self.num_epochs) maxEnergy = np.zeros(self.num_epochs) #~ meanEnergy_test = np.zeros(self.num_epochs) #~ minEnergy_test = np.zeros(self.num_epochs) #~ maxEnergy_test = np.zeros(self.num_epochs) # start training for epoch in range(self.num_epochs): print "Epoch " + str(epoch) # anneal learning rates as found in the original code - # uncomment if you wish to use annealing! #~ epsilonVFc = epsilonVF/max(1,epoch/20) #~ epsilonFHc = epsilonFH/max(1,epoch/20) #~ epsilonbc = epsilonb/max(1,epoch/20) #~ epsilonw_meanc = epsilonw_mean/max(1,epoch/20) #~ epsilonb_meanc = epsilonb_mean/max(1,epoch/20) # no annealing is used in our experiments because learning # was stopping too early epsilonVFc = epsilonVF epsilonFHc = epsilonFH epsilonbc = epsilonb epsilonw_meanc = epsilonw_mean epsilonb_meanc = epsilonb_mean weightcost = weightcost_final if epoch <= self.startFH: epsilonFHc = 0 if epoch <= self.startwd: weightcost = 0 # commented to avoid computing the energy on test data #~ data_test = test_dat #~ data_test.mult(data_test, target = t6_test) # DxP #~ t6_test.sum(axis = 0, target = lengthsq_test) # 1xP #~ lengthsq_test.mult(1./num_vis) # normalize by number of components (like std) #~ lengthsq_test.add(small) # small avoids division by 0 #~ cmt.sqrt(lengthsq_test, target = length_test) #~ length_test.reciprocal(target = normcoeff_test) # 1xP #~ data_test.mult_by_row(normcoeff_test, target = normdata_test) # normalized data for batch in range(num_batches): # get current minibatch data = dev_dat.slice( batch * self.batch_size, (batch + 1) * self.batch_size) # DxP (nr dims x nr samples) # normalize input data data.mult(data, target=t6) # DxP t6.sum(axis=0, target=lengthsq) # 1xP lengthsq.mult( 1. / num_vis) # normalize by number of components (like std) lengthsq.add(small) # small avoids division by 0 cmt.sqrt(lengthsq, target=length) length.reciprocal(target=normcoeff) # 1xP data.mult_by_row(normcoeff, target=normdata) # normalized data ## compute positive sample derivatives # covariance part cmt.dot(VF.T, normdata, target=feat) # HxP (nr facs x nr samples) feat.mult(feat, target=featsq) # HxP cmt.dot(FH.T, featsq, target=t1) # OxP (nr cov hiddens x nr samples) t1.mult(-0.5) t1.add_col_vec(bias_cov) # OxP t1.apply_sigmoid(target=t2) # OxP cmt.dot(featsq, t2.T, target=FHinc) # HxO cmt.dot(FH, t2, target=t3) # HxP t3.mult(feat) cmt.dot(normdata, t3.T, target=VFinc) # VxH t2.sum(axis=1, target=bias_covinc) bias_covinc.mult(-1) # visible bias data.sum(axis=1, target=bias_visinc) bias_visinc.mult(-1) # mean part cmt.dot(w_mean.T, data, target=feat_mean) # HxP (nr mean hiddens x nr samples) feat_mean.add_col_vec(bias_mean) # HxP feat_mean.apply_sigmoid() # HxP feat_mean.mult(-1) cmt.dot(data, feat_mean.T, target=w_meaninc) feat_mean.sum(axis=1, target=bias_meaninc) # HMC sampling: draw an approximate sample from the model if self.doPCD == 0: # CD-1 (set negative data to current training samples) hmc_step, hmc_ave_rej = self.draw_HMC_samples( data, negdata, normdata, vel, gradient, normgradient, new_energy, old_energy, VF, FH, bias_cov, bias_vis, w_mean, bias_mean, hmc_step, hmc_step_nr, hmc_ave_rej, hmc_target_ave_rej, t1, t2, t3, t4, t5, t6, t7, thresh, feat, featsq, self.batch_size, feat_mean, length, lengthsq, normcoeff, small, num_vis) else: # PCD-1 (use previous negative data as starting point for chain) negdataini.assign(negdata) hmc_step, hmc_ave_rej = self.draw_HMC_samples( negdataini, negdata, normdata, vel, gradient, normgradient, new_energy, old_energy, VF, FH, bias_cov, bias_vis, w_mean, bias_mean, hmc_step, hmc_step_nr, hmc_ave_rej, hmc_target_ave_rej, t1, t2, t3, t4, t5, t6, t7, thresh, feat, featsq, self.batch_size, feat_mean, length, lengthsq, normcoeff, small, num_vis) # compute derivatives at the negative samples # normalize input data negdata.mult(negdata, target=t6) # DxP t6.sum(axis=0, target=lengthsq) # 1xP lengthsq.mult( 1. / num_vis) # normalize by number of components (like std) lengthsq.add(small) cmt.sqrt(lengthsq, target=length) length.reciprocal(target=normcoeff) # 1xP negdata.mult_by_row(normcoeff, target=normdata) # normalized data # covariance part cmt.dot(VF.T, normdata, target=feat) # HxP feat.mult(feat, target=featsq) # HxP cmt.dot(FH.T, featsq, target=t1) # OxP t1.mult(-0.5) t1.add_col_vec(bias_cov) # OxP t1.apply_sigmoid(target=t2) # OxP FHinc.subtract_dot(featsq, t2.T) # HxO FHinc.mult(0.5) cmt.dot(FH, t2, target=t3) # HxP t3.mult(feat) VFinc.subtract_dot(normdata, t3.T) # VxH bias_covinc.add_sums(t2, axis=1) # visible bias bias_visinc.add_sums(negdata, axis=1) # mean part cmt.dot(w_mean.T, negdata, target=feat_mean) # HxP feat_mean.add_col_vec(bias_mean) # HxP feat_mean.apply_sigmoid() # HxP w_meaninc.add_dot(negdata, feat_mean.T) bias_meaninc.add_sums(feat_mean, axis=1) # update parameters VFinc.add_mult(VF.sign(), weightcost) # L1 regularization VF.add_mult(VFinc, -epsilonVFc / self.batch_size) # normalize columns of VF: normalize by running average of their norm VF.mult(VF, target=t8) t8.sum(axis=0, target=t10) cmt.sqrt(t10) t10.sum(axis=1, target=t5) t5.copy_to_host() normVF = .95 * normVF + ( .05 / self.num_fac) * t5.numpy_array[0, 0] # estimate norm t10.reciprocal() VF.mult_by_row(t10) VF.mult(normVF) bias_cov.add_mult(bias_covinc, -epsilonbc / self.batch_size) bias_vis.add_mult(bias_visinc, -epsilonbc / self.batch_size) if epoch > self.startFH: FHinc.add_mult(FH.sign(), weightcost) # L1 regularization FH.add_mult(FHinc, -epsilonFHc / self.batch_size) # update # set to 0 negative entries in FH FH.greater_than(0, target=t9) FH.mult(t9) if self.apply_mask == 1: FH.mult(mask) # normalize columns of FH: L1 norm set to 1 in each column FH.sum(axis=0, target=t11) t11.reciprocal() FH.mult_by_row(t11) w_meaninc.add_mult(w_mean.sign(), weightcost) w_mean.add_mult(w_meaninc, -epsilonw_meanc / self.batch_size) bias_mean.add_mult(bias_meaninc, -epsilonb_meanc / self.batch_size) if self.verbose == 1: print "VF: " + '%3.2e' % VF.euclid_norm( ) + ", DVF: " + '%3.2e' % ( VFinc.euclid_norm() * (epsilonVFc / self.batch_size) ) + ", FH: " + '%3.2e' % FH.euclid_norm( ) + ", DFH: " + '%3.2e' % ( FHinc.euclid_norm() * (epsilonFHc / self.batch_size) ) + ", bias_cov: " + '%3.2e' % bias_cov.euclid_norm( ) + ", Dbias_cov: " + '%3.2e' % ( bias_covinc.euclid_norm() * (epsilonbc / self.batch_size) ) + ", bias_vis: " + '%3.2e' % bias_vis.euclid_norm( ) + ", Dbias_vis: " + '%3.2e' % ( bias_visinc.euclid_norm() * (epsilonbc / self.batch_size) ) + ", wm: " + '%3.2e' % w_mean.euclid_norm( ) + ", Dwm: " + '%3.2e' % ( w_meaninc.euclid_norm() * (epsilonw_meanc / self.batch_size) ) + ", bm: " + '%3.2e' % bias_mean.euclid_norm( ) + ", Dbm: " + '%3.2e' % ( bias_meaninc.euclid_norm() * (epsilonb_meanc / self.batch_size) ) + ", step: " + '%3.2e' % hmc_step + ", rej: " + '%3.2e' % hmc_ave_rej with open('terminal.txt', 'a') as f: f.write('\n' + "epoch: %s" % str(epoch) + ", VF: " + '%3.2e' % VF.euclid_norm() + ", DVF: " + '%3.2e' % (VFinc.euclid_norm() * (epsilonVFc / self.batch_size)) + ", FH: " + '%3.2e' % FH.euclid_norm() + ", DFH: " + '%3.2e' % (FHinc.euclid_norm() * (epsilonFHc / self.batch_size)) + ", bias_cov: " + '%3.2e' % bias_cov.euclid_norm() + ", Dbias_cov: " + '%3.2e' % (bias_covinc.euclid_norm() * (epsilonbc / self.batch_size)) + ", bias_vis: " + '%3.2e' % bias_vis.euclid_norm() + ", Dbias_vis: " + '%3.2e' % (bias_visinc.euclid_norm() * (epsilonbc / self.batch_size)) + ", wm: " + '%3.2e' % w_mean.euclid_norm() + ", Dwm: " + '%3.2e' % (w_meaninc.euclid_norm() * (epsilonw_meanc / self.batch_size)) + ", bm: " + '%3.2e' % bias_mean.euclid_norm() + ", Dbm: " + '%3.2e' % (bias_meaninc.euclid_norm() * (epsilonb_meanc / self.batch_size)) + ", step: " + '%3.2e' % hmc_step + ", rej: " + '%3.2e' % hmc_ave_rej) sys.stdout.flush() # commented to avoid computing the energy on trainig data self.compute_energy_mcRBM_visual(data, normdata, energy, VF, FH, bias_cov, bias_vis, w_mean, bias_mean, t1, t2, t6, feat, featsq, feat_mean, length, lengthsq, normcoeff, small, num_vis) energy.copy_to_host() meanEnergy[epoch] = np.mean(energy.numpy_array) minEnergy[epoch] = np.min(energy.numpy_array) maxEnergy[epoch] = np.max(energy.numpy_array) # commented to avoid computing the energy on test data #~ self.compute_energy_mcRBM_visual(data_test,normdata_test,energy_test,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,t1_test,t2_test,t6_test,feat_test,featsq_test,feat_mean_test,length_test,lengthsq_test,normcoeff_test,small,num_vis) #~ energy_test.copy_to_host() #~ meanEnergy_test[epoch] = np.mean(energy_test.numpy_array) #~ minEnergy_test[epoch] = np.min(energy_test.numpy_array) #~ maxEnergy_test[epoch] = np.max(energy_test.numpy_array) ax1.cla() ax1.plot(range(epoch), meanEnergy[0:epoch]) ax1.plot(range(epoch), maxEnergy[0:epoch]) ax1.plot(range(epoch), minEnergy[0:epoch]) if np.mod(epoch, 100) == 0: #f1.savefig(output_folder + str(epoch)+'_'+'fig.png') f1.savefig(self.plotsDir + '/energy/energyAt_%s.png' % str(epoch)) # back-up every once in a while if np.mod(epoch, 100) == 0: VF.copy_to_host() FH.copy_to_host() bias_cov.copy_to_host() w_mean.copy_to_host() bias_mean.copy_to_host() bias_vis.copy_to_host() savemat( "./weights/ws_temp%s" % str(epoch), { 'VF': VF.numpy_array, 'FH': FH.numpy_array, 'bias_cov': bias_cov.numpy_array, 'bias_vis': bias_vis.numpy_array, 'w_mean': w_mean.numpy_array, 'bias_mean': bias_mean.numpy_array, 'epoch': epoch }) # uncomment if computing the energy in order to store its evolution throghout training #~ savemat(self.refDir + '/' + "training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {'meanEnergy':meanEnergy,'meanEnergy_test':meanEnergy_test,'maxEnergy': maxEnergy, 'maxEnergy_test': maxEnergy_test, 'minEnergy': minEnergy, 'minEnergy_test': minEnergy_test, 'epoch':epoch}) #savemat("training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {'meanEnergy':meanEnergy, 'maxEnergy': maxEnergy, 'minEnergy': minEnergy, 'epoch':epoch}) # in order to stop the training gracefully, create an empty file # named 'stop_now' in the folder containing the experiment # configuration file if os.path.isfile('stop_now'): break # final back-up VF.copy_to_host() FH.copy_to_host() bias_cov.copy_to_host() bias_vis.copy_to_host() w_mean.copy_to_host() bias_mean.copy_to_host() savemat( "ws_fac%s" % str(self.num_fac) + "_cov%s" % str(self.num_hid_cov) + "_mean%s" % str(self.num_hid_mean), { 'VF': VF.numpy_array, 'FH': FH.numpy_array, 'bias_cov': bias_cov.numpy_array, 'bias_vis': bias_vis.numpy_array, 'w_mean': w_mean.numpy_array, 'bias_mean': bias_mean.numpy_array, 'epoch': epoch }) # uncomment if computing the energy in order to store its evolution throghout training #~ savemat(self.refDir + '/' + "training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), {'meanEnergy':meanEnergy,'meanEnergy_test':meanEnergy_test,'maxEnergy': maxEnergy, 'maxEnergy_test': maxEnergy_test, 'minEnergy': minEnergy, 'minEnergy_test': minEnergy_test, 'epoch':epoch}) savemat( "training_energy_" + str(self.num_fac) + "_cov" + str(self.num_hid_cov) + "_mean" + str(self.num_hid_mean), { 'meanEnergy': meanEnergy, 'maxEnergy': maxEnergy, 'minEnergy': minEnergy, 'epoch': epoch }) # Compute states if desired: # normalise data for covariance hidden: #dsq = np.square(visData) #lsq = np.sum(dsq, axis=0) #lsq /= visData.shape[1] #lsq += np.spacing(1) #l = np.sqrt(lsq) #normD = visData/l #logisticArg_c = (-0.5*np.dot(FH.numpy_array.T, np.square(np.dot(VF.numpy_array.T, normD.T))) + bias_cov.numpy_array).T #p_hc = logisticFunc(logisticArg_c) #logisticArg_m = np.dot(visData, w_mean.numpy_array) + bias_mean.numpy_array.T #p_hm = logisticFunc(logisticArg_m) #p_all = np.concatenate((p_hc, p_hm), axis=1) #savemat(self.probabilitiesDir + '/pAll_%i.mat' % epoch, mdict={'p_all':p_all}) with open('done', 'w') as doneFile: doneFile.write( datetime.strftime(datetime.now(), '%d/%m/%Y %H:%M:%S'))
def rbm(data, numHid, modelType = "BB", **kwargs) : """ rbm defination data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities when type is GB, should be continuous real value. data should have a format of *.npy numHid : number nodes of and hidden layer type : rbm type, can be set as BB or GB additional inputs (specified as name value pairs or in struct) method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis penalty weight decay factor batchsize The number of training instances per batch verbose For printing progress anneal Flag. If set true, the penalty is annealed linearly through epochs to 10% of its original value OUTPUTS: model.type Type of RBM (i.e. type of its visible and hidden units) model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.top The activity of the top layer, to be used when training DBN's errors The errors in reconstruction at every epoch """ arg = util.processOptions(kwargs, \ method = "CD", \ eta = 0.1, \ momentum = 0.9,\ maxEpoch = 50, \ avgLast = 0, \ penalty = 0, \ batchSize = 50, \ verbose = True, \ anneal = False) [method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose, anneal] = [\ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"],\ arg["anneal"] ] # from which step, we start to compute the average avgStart = maxEpoch - avgLast # for weight decay use oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(data.shape) if verbose : print "processing data" numVis = numDims numBatch = util.ceil(numCases,batchSize) # shuffle the data np.random.shuffle(data) # init CUDA # cm.cuda_set_device() cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) # init weights weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1,numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1,numHid))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidActP2 = cm.empty((batchSize, numHid)) visState = cm.empty((batchSize,numVis)) hidState = cm.empty((batchSize, numHid)) t = 1 for epoch in range(maxEpoch) : error = [] if anneal : # apply linear weight decay penalty = oldPenalty - 0.9 *epoch/maxEpoch*oldPenalty for batch in range(numBatch) : # train each data batch if batchSize*(batch+1) > numCases : visTrue = deviceData.get_row_slice(batchSize*batch, numCases) batchSize = visTrue.shape[0] else : visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) # positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() hidState.fill_with_rand() hidState.less_than(hidActP, target=hidState) if cmp(method, "SML") == 0 : if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) : pass # here does not need in practical use elif cmp(method, "CD") == 0 : pass # negetive phase if cmp(modelType, "BB") == 0 : cm.dot(hidState, weight.transpose(), target = visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() visState.fill_with_rand() visState.less_than(visActP, target = visState) elif cmp(modelType, "GB") == 0 : cm.dot(hidState, weight.transpose(), target = visActP) visActP.add_row_vec(biasV) visActP.add(np.random.randn(batchSize, numVis),target=visState) # another positive phase cm.dot(visState, weight, target = hidActP2) hidActP2.add_row_vec(biasH) hidActP2.apply_sigmoid() hidState.fill_with_rand() hidState.less_than(hidActP2, target=hidState) #update weight and bias dWeight = cm.dot(visTrue.transpose(), hidActP) dWeight.subtract_dot(visState.transpose(), hidActP2) dBiasV = visTrue.sum(axis = 0).subtract(visState.sum(axis = 0)) dBiasH = hidActP.sum(axis=0).subtract(hidActP2.sum(axis = 0)) dWeight.divide(batchSize).subtract(weight.mult(penalty)) dBiasV.divide(batchSize) dBiasH.divide(batchSize) weightInc.mult(momentum).add_mult(dWeight, eta) biasVInc.mult(momentum).add_mult(dBiasV, eta) biasHInc.mult(momentum).add_mult(dBiasH, eta) weight.add(weightInc) biasV.add(biasVInc) biasH.add(biasHInc) if epoch > avgStart : # apply average weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t)) biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t)) biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t)) t = t+1 else : weightAgv = weight biasVAgv = biasV biasHAgv = biasH # reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm() ** 2) if verbose : print "epoch %d/%d. Reconstruction error is %f " % (epoch+1, maxEpoch, sum(error)) # save rbm model top = cm.CUDAMatrix(np.zeros((numCases, numHid))) cm.dot(deviceData, weightAgv, target = top) top.add_row_vec(biasHAgv) top.apply_sigmoid() model_ = m.rbmModel(weightAgv,biasVAgv,biasHAgv,type = modelType,top = top) cm.shutdown() return model_
def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs): """ X ... data. should be binary, or in [0,1] interpreted as ... probabilities numhid ... number of hidden units y ... List of discrete labels nClass number of classes method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis batchsize The number of training instances per batch verbose For printing progress model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.weightlabel ... The weights on labels layer model.biasLabel ... The biases on labels layer errors The errors in reconstruction at each epoch """ arg = util.processOptions(kwargs, \ nClass = np.unique(y).size, \ method = "CD", \ eta = 0.1, \ momentum = 0.5,\ maxEpoch = 500, \ avgLast = 0, \ penalty = 0, \ batchSize = 100, \ verbose = True) [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\ arg["nClass"],\ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"] ] if verbose: print "Processing data ..." # from which step, we start to compute the average # avgStart = maxEpoch - avgLast # for weight decay use # oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(X.shape) numVis = numDims uniqueLabel = np.unique(y) numBatch = util.ceil(numCases, batchSize) y = util.matrixLabel(y) # shuffle data and label data = copy.deepcopy(X) [data, label] = util.shuffle(data, y) # init CUDA cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) deviceLabel = cm.CUDAMatrix(cm.reformat(label)) # init weights weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) weightLabel = cm.CUDAMatrix(0.1 * np.random.randn(nClass, numHid)) biasLabel = cm.CUDAMatrix(np.zeros((1, nClass))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1, numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1, numHid))) weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid))) biasLabelInc = cm.CUDAMatrix(np.zeros((1, nClass))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) for epoch in range(maxEpoch): error = [] for batch in range(numBatch): # train each data batch if batchSize * (batch + 1) > numCases: visTrue = deviceData.get_row_slice(batchSize * batch, numCases) labelTrue = deviceLabel.get_row_slice(batchSize * batch, numCases) batchSize = visTrue.shape[0] visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) else: visTrue = deviceData.get_row_slice(batchSize * batch, batchSize * (batch + 1)) labelTrue = deviceLabel.get_row_slice(batchSize * batch, batchSize * (batch + 1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) #apply momentum weightInc.mult(momentum) biasVInc.mult(momentum) biasHInc.mult(momentum) weightLabel.mult(momentum) biasLabel.mult(momentum) # positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_dot(labelTrue, weightLabel) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.add_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0) biasHInc.add_sums(hidActP, axis=0) weightLabelInc.add_dot(labelTrue.T, hidActP) biasLabelInc.add_sums(labelTrue, axis=0) hidState.fill_with_rand() hidState.less_than(hidActP, target=hidActP) if cmp(method, "SML") == 0: if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)): pass # here does not need in practical use elif cmp(method, "CD") == 0: pass # negative phase cm.dot(hidActP, weight.T, target=visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() cm.dot(hidActP, weightLabel.T, target=labelTrue) labelTrue.add_row_vec(biasLabel) labelTrue = util.softmax(labelTrue) # another positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_dot(labelTrue, weightLabel) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.subtract_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0, mult=-1) biasHInc.add_sums(hidActP, axis=0, mult=-1) weightLabelInc.subtract_dot(labelTrue.T, hidActP) biasLabelInc.add_sums(labelTrue, axis=0, mult=-1) # update weights and bias weight.add_mult(weightInc, eta / batchSize) biasV.add_mult(biasVInc, eta / batchSize) biasH.add_mult(biasHInc, eta / batchSize) weightLabel.add_mult(weightLabelInc, eta / batchSize) biasLabel.add_mult(biasLabelInc, eta / batchSize) # calculate reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm()**2) # free memory visTrue.free_device_memory() labelTrue.free_device_memory() if verbose: print "Epoch %d/%d, reconstruction error is %f " % ( epoch + 1, maxEpoch, sum(error)) # save rbm model weight.copy_to_host() biasV.copy_to_host() biasH.copy_to_host() weightLabel.copy_to_host() biasLabel.copy_to_host() model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \ weightLabel = weightLabel.numpy_array,\ biasLabel = biasLabel.numpy_array, labels = uniqueLabel) # free device memory deviceData.free_device_memory() deviceLabel.free_device_memory() weight.free_device_memory() biasV.free_device_memory() biasH.free_device_memory() weightLabel.free_device_memory() biasLabel.free_device_memory() weightInc.free_device_memory() biasVInc.free_device_memory() biasHInc.free_device_memory() weightLabelInc.free_device_memory() biasLabelInc.free_device_memory() hidActP.free_device_memory() visActP.free_device_memory() hidState.free_device_memory() cm.shutdown() if isSaveModel: modelList = [] modelList.append(model_) model = np.array(modelList) np.save(name, model) return model_
def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs) : """ X ... data. should be binary, or in [0,1] interpreted as ... probabilities numhid ... number of hidden units y ... List of discrete labels nClass number of classes method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis batchsize The number of training instances per batch verbose For printing progress model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.weightlabel ... The weights on labels layer model.biasLabel ... The biases on labels layer errors The errors in reconstruction at each epoch """ arg = util.processOptions(kwargs, \ nClass = np.unique(y).size, \ method = "CD", \ eta = 0.1, \ momentum = 0.5,\ maxEpoch = 500, \ avgLast = 0, \ penalty = 0, \ batchSize = 100, \ verbose = True) [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\ arg["nClass"],\ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"] ] if verbose : print "Processing data ..." # from which step, we start to compute the average # avgStart = maxEpoch - avgLast # for weight decay use # oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(X.shape) numVis = numDims uniqueLabel = np.unique(y) numBatch = util.ceil(numCases, batchSize) y = util.matrixLabel(y) # shuffle data and label data = copy.deepcopy(X) [data, label] = util.shuffle(data, y) # init CUDA cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) deviceLabel = cm.CUDAMatrix(cm.reformat(label)) # init weights weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) weightLabel = cm.CUDAMatrix(0.1*np.random.randn(nClass, numHid)) biasLabel = cm.CUDAMatrix(np.zeros((1,nClass))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1,numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1,numHid))) weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid))) biasLabelInc = cm.CUDAMatrix(np.zeros((1,nClass))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) for epoch in range(maxEpoch) : error = [] for batch in range(numBatch) : # train each data batch if batchSize*(batch+1) > numCases : visTrue = deviceData.get_row_slice(batchSize*batch, numCases) labelTrue = deviceLabel.get_row_slice(batchSize*batch, numCases) batchSize = visTrue.shape[0] visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) else : visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1)) labelTrue = deviceLabel.get_row_slice(batchSize*batch, batchSize*(batch+1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) #apply momentum weightInc.mult(momentum) biasVInc.mult(momentum) biasHInc.mult(momentum) weightLabel.mult(momentum) biasLabel.mult(momentum) # positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_dot(labelTrue, weightLabel) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.add_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0) biasHInc.add_sums(hidActP, axis=0) weightLabelInc.add_dot(labelTrue.T, hidActP) biasLabelInc.add_sums(labelTrue, axis=0) hidState.fill_with_rand() hidState.less_than(hidActP, target=hidActP) if cmp(method, "SML") == 0 : if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) : pass # here does not need in practical use elif cmp(method, "CD") == 0 : pass # negative phase cm.dot(hidActP, weight.T, target = visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() cm.dot(hidActP, weightLabel.T, target = labelTrue) labelTrue.add_row_vec(biasLabel) labelTrue = util.softmax(labelTrue) # another positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_dot(labelTrue, weightLabel) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.subtract_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0, mult=-1) biasHInc.add_sums(hidActP, axis=0, mult=-1) weightLabelInc.subtract_dot(labelTrue.T, hidActP) biasLabelInc.add_sums(labelTrue, axis=0, mult=-1) # update weights and bias weight.add_mult(weightInc, eta/batchSize) biasV.add_mult(biasVInc, eta/batchSize) biasH.add_mult(biasHInc, eta/batchSize) weightLabel.add_mult(weightLabelInc, eta/batchSize) biasLabel.add_mult(biasLabelInc, eta/batchSize) # calculate reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm()**2) # free memory visTrue.free_device_memory() labelTrue.free_device_memory() if verbose : print "Epoch %d/%d, reconstruction error is %f " % (epoch+1, maxEpoch, sum(error)) # save rbm model weight.copy_to_host() biasV.copy_to_host() biasH.copy_to_host() weightLabel.copy_to_host() biasLabel.copy_to_host() model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \ weightLabel = weightLabel.numpy_array,\ biasLabel = biasLabel.numpy_array, labels = uniqueLabel) # free device memory deviceData.free_device_memory() deviceLabel.free_device_memory() weight.free_device_memory() biasV.free_device_memory() biasH.free_device_memory() weightLabel.free_device_memory() biasLabel.free_device_memory() weightInc.free_device_memory() biasVInc.free_device_memory() biasHInc.free_device_memory() weightLabelInc.free_device_memory() biasLabelInc.free_device_memory() hidActP.free_device_memory() visActP.free_device_memory() hidState.free_device_memory() cm.shutdown() if isSaveModel : modelList = [] modelList.append(model_) model = np.array(modelList) np.save(name,model) return model_
def main(): parser = ArgumentParser() parser.add_argument("query_file", help = "word2vec file in json format") parser.add_argument("bidword_file", help = "word2vec file in json format") args = parser.parse_args() query_file = args.query_file bidword_file = args.bidword_file if DEBUG_FLAG: print "loading bidword dict ..." start = time() bidword_list, bidword_matrix = load_normalized_matrix(bidword_file) end = time() if DEBUG_FLAG: print "loading bidword dict done", duration(start, end) if DEBUG_FLAG: print "loading query dict ..." start = time() query_list, query_matrix = load_normalized_matrix(query_file) end = time() if DEBUG_FLAG: print "loading query dict done", duration(start, end) hash_length = 12 hash_number = 1 seed_matrix = random((200, hash_length * hash_number)) - 0.5 if DEBUG_FLAG: print "initing cublas ..." start = time() cuda_set_device(1) cublas_init(1000000) end = time() if DEBUG_FLAG: print "initing cublas done", duration(start, end) if DEBUG_FLAG: print "computing hash_matrix ..." start = time() cuda_seed_matrix = CUDAMatrix(seed_matrix) cuda_bidword_matrix = CUDAMatrix(bidword_matrix) bidword_hash_matrix = dot(cuda_bidword_matrix, cuda_seed_matrix).asarray() del cuda_bidword_matrix cuda_query_matrix = CUDAMatrix(query_matrix) query_hash_matrix = dot(cuda_query_matrix, cuda_seed_matrix).asarray() del cuda_query_matrix end = time() if DEBUG_FLAG: print "computing hash_matrix done", duration(start, end) if DEBUG_FLAG: print "initing bidword_hash_dict_list ..." start = time() bidword_hash_dict_list = [dict([]) for i in xrange(hash_number)] end = time() if DEBUG_FLAG: print "initing bidword_hash_dict_list done", duration(start, end) if DEBUG_FLAG: print "aggregating bidword_hash_dict_list ..." start = time() for i in xrange(bidword_hash_matrix.shape[0]): hash_string = "".join(['1' if j > 0 else '0' for j in bidword_hash_matrix[i, :]]) for j in xrange(hash_number): hash_index_start = j * hash_length hash_index_end = hash_index_start + hash_length hash_key = hash_string[hash_index_start:hash_index_end] if hash_key in bidword_hash_dict_list[j]: bidword_hash_dict_list[j][hash_key].add(i) else: bidword_hash_dict_list[j][hash_key] = set([i]) end = time() if DEBUG_FLAG: print "aggregating bidword_hash_dict_list done", duration(start, end) if DEBUG_FLAG: print "aggregating query_hash_dict ..." start = time() query_hash_dict = {} for i in xrange(query_hash_matrix.shape[0]): hash_string = "".join(['1' if j > 0 else '0' for j in query_hash_matrix[i, :]]) if hash_string in query_hash_dict: query_hash_dict[hash_string].add(i) else: query_hash_dict[hash_string] = set([i]) end = time() if DEBUG_FLAG: print "aggregating querh_hash_dict done", duration(start, end) profiler_total = 0 profiler_first = 0 profiler_first_zero = 0 profiler_first_one = 0 profiler_first_two = 0 profiler_first_three = 0 profiler_first_four = 0 profiler_second = 0 profiler_third = 0 timer = time() for hash_string in query_hash_dict: time_flag_total = time() time_flag_first = time() # random release memory if random_sample() > 0.95: collect() # aggregating query_index_set and bidword_index_set query_index_set = query_hash_dict[hash_string] bidword_index_set = set() for i in xrange(hash_number): time_flag_first_zero = time() hash_index_start = i * hash_length hash_index_end = hash_index_start + hash_length hash_key = hash_string[hash_index_start:hash_index_end] profiler_first_zero += time() - time_flag_first_zero # circum hash with hamming distance 0 time_flag_first_one = time() bidword_index_set |= bidword_hash_dict_list[i][hash_key] profiler_first_one += time() - time_flag_first_one # circum hash with hamming distance 1 time_flag_first_two = time() for first_index in xrange(hash_length): circum_hash_key = list(hash_key) circum_hash_key[first_index] = '1' if hash_key[first_index] == '0' else '0' circum_hash_key = "".join(circum_hash_key) if circum_hash_key in bidword_hash_dict_list[i]: bidword_index_set |= bidword_hash_dict_list[i][circum_hash_key] profiler_first_two += time() - time_flag_first_two # circum hash with hamming distance 2 time_flag_first_three = time() for first_index, second_index in combinations(range(hash_length), 2): circum_hash_key = list(hash_key) circum_hash_key[first_index] = '1' if hash_key[first_index] == '0' else '0' circum_hash_key[second_index] = '1' if hash_key[second_index] == '0' else '0' circum_hash_key = "".join(circum_hash_key) if circum_hash_key in bidword_hash_dict_list[i]: bidword_index_set |= bidword_hash_dict_list[i][circum_hash_key] profiler_first_three += time() - time_flag_first_three ## circum hash with hamming distance 3 #time_flag_first_four = time() #for first_index, second_index, third_index in combinations(range(hash_length), 3): # circum_hash_key = list(hash_key) # circum_hash_key[first_index] = '1' if hash_key[first_index] == '0' else '0' # circum_hash_key[second_index] = '1' if hash_key[second_index] == '0' else '0' # circum_hash_key[third_index] = '1' if hash_key[third_index] == '0' else '0' # circum_hash_key = "".join(circum_hash_key) # if circum_hash_key in bidword_hash_dict_list[i]: # bidword_index_set |= bidword_hash_dict_list[i][circum_hash_key] #profiler_first_four += time() - time_flag_first_four # computing sim between query_index_list and bidword_index_list profiler_first += time() - time_flag_first query_index_list = list(query_index_set) bidword_index_list = list(bidword_index_set) partition_length = 1e8 if DEBUG_FLAG or True: print "### profile ### matrix shape:", query_matrix[query_index_list, :].shape, bidword_matrix[bidword_index_list, :].transpose().shape, len(query_index_list) * len(bidword_index_list) if len(bidword_index_list) > partition_length: raise Exception("bidword_index_list too long: %d" % len(query_index_list)) step = int(partition_length / len(bidword_index_list)) partition_begin = 0 partition_end = 0 while partition_end < len(query_index_list): partition_end = len(query_index_list) if partition_begin + step > len(query_index_list) else partition_begin + step if DEBUG_FLAG or True: print "### profile ### partition_begin:", partition_begin, "partition_end:", partition_end time_flag_second = time() sim_matrix = dot( CUDAMatrix(query_matrix[query_index_list[partition_begin:partition_end], :]), CUDAMatrix(bidword_matrix[bidword_index_list, :].transpose()) ).asarray().tolist() profiler_second += time() - time_flag_second profiler_third += sort_matrix(sim_matrix, query_list, query_index_list[partition_begin:partition_end], bidword_list, bidword_index_list) partition_begin = partition_end profiler_total += time() - time_flag_total if DEBUG_FLAG or True: print "### profile ### total=%f first=%f(%f)[%f(%f)%f(%f)%f(%f)%f(%f)%f(%f)] second=%f(%f) third=%f(%f) %s(%f)" % ( profiler_total, profiler_first, profiler_first / profiler_total, profiler_first_zero, profiler_first_zero / profiler_first, profiler_first_one, profiler_first_one / profiler_first, profiler_first_two, profiler_first_two / profiler_first, profiler_first_three, profiler_first_three / profiler_first, profiler_first_four, profiler_first_four / profiler_first, profiler_second, profiler_second / profiler_total, profiler_third, profiler_third / profiler_total, duration(timer, time()), time() - timer )
def setup(): cm.cublas_init()
def rbmPredict(m, X): """using trained rbm model to do prediction""" nClass = m.labels.size numCase = X.shape[0] # This part is executed on CPU # define the free energy # FF = np.zeros((numCase, nClass)) # FFcol = np.zeros((numCase, 1)) # for index in range(nClass) : # temp = np.zeros((numCase, nClass)) # temp[:, index] = 1 # # tt = np.emath.log(np.exp(np.dot(X, m.weight)+ np.dot(temp, m.weightLabel) + m.biasH)+1) # # FFcol = temp[:,index] * m.biasLabel[0,index] + np.sum(tt,axis = 1) # # FF[:, index] = FFcol # # [x, y] = np.where(np.abs(FF - np.max(FF, axis=1, keepdims=True)) < 1e-5) # result = np.zeros(y.shape) # for index in range(y.size) : # result[index] = m.labels[y[index]] # The following part runs on GPU cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasH = cm.CUDAMatrix(cm.reformat(m.biasH)) weightLabel = cm.CUDAMatrix(cm.reformat(m.weightLabel)) biasLabel = cm.CUDAMatrix(cm.reformat(m.biasLabel)) F = cm.CUDAMatrix(np.zeros((numCase, nClass))) Fcol = cm.CUDAMatrix(np.zeros((numCase, 1))) temp = cm.CUDAMatrix(np.zeros((numCase, nClass))) tt = cm.CUDAMatrix(np.zeros((numCase, biasH.asarray().size))) for index in range(nClass): temp.assign(0) temp.set_col_slice(index, index + 1, 1) tt = cm.dot(data, weight) tt.add_dot(temp, weightLabel) tt.add_row_vec(biasH) cm.log_1_plus_exp(tt, target=tt, exact=True) Fcol = cm.sum(tt, axis=1) Fcol.add_mult(temp.get_col_slice(index, index + 1), biasLabel.numpy_array[0, index]) F.set_col_slice(index, index + 1, Fcol) tt.free_device_memory() F.copy_to_host() [x, y] = np.where(np.abs(F.numpy_array - np.max(F.numpy_array, axis=1, keepdims=True)) < 1e-5) # free device memory data.free_device_memory() weight.free_device_memory() biasH.free_device_memory() biasLabel.free_device_memory() weightLabel.free_device_memory() F.free_device_memory() Fcol.free_device_memory() temp.free_device_memory() cm.shutdown() result = np.zeros(y.shape) for index in range(y.size): result[index] = m.labels[y[index]] return [result, F.numpy_array]
def LockGPU(max_retries=10, board=-1): # Assuming you already got GPU lock cm.cuda_set_device(board) cm.cublas_init() return board
def rbm(X, numHid, **kwargs): """ rbm defination data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities when type is GB, should be continuous real value. data should have a format of *.npy numHid : number nodes of and hidden layer type : rbm type, can be set as BB or GB method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis batchsize The number of training instances per batch verbose For printing progress model.type Type of RBM (i.e. type of its visible and hidden units) model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.top The activity of the top layer, to be used when training DBN's errors The errors in reconstruction at every epoch """ # when compute the transpose of a matrix, using the method *.transpose() is much space consuming. I suggest we can use # .T atrribute instead arg = util.processOptions(kwargs, \ modelType = "BB", \ method = "CD", \ eta = 0.1, \ momentum = 0.5,\ maxEpoch = 500, \ avgLast = 0, \ penalty = 0, \ batchSize = 100, \ verbose = True) [modelType, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\ arg["modelType"], \ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"] ] # from which step, we start to compute the average # avgStart = maxEpoch - avgLast # for weight decay use # oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(X.shape) if verbose: print "processing data" numVis = numDims numBatch = util.ceil(numCases, batchSize) # shuffle the data data = copy.deepcopy(X) np.random.shuffle(data) # init CUDA # cm.cuda_set_device() cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) # init weights weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1, numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1, numHid))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) for epoch in range(maxEpoch): error = [] for batch in range(numBatch): # train each data batch if batchSize * (batch + 1) > numCases: visTrue = deviceData.get_row_slice(batchSize * batch, numCases) batchSize = visTrue.shape[0] visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) else: visTrue = deviceData.get_row_slice(batchSize * batch, batchSize * (batch + 1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) #apply momentum weightInc.mult(momentum) biasVInc.mult(momentum) biasHInc.mult(momentum) # positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.add_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0) biasHInc.add_sums(hidActP, axis=0) hidState.fill_with_rand() hidState.less_than(hidActP, target=hidActP) if cmp(method, "SML") == 0: if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)): pass # here does not need in practical use elif cmp(method, "CD") == 0: pass # negetive phase if cmp(modelType, "BB") == 0: cm.dot(hidActP, weight.T, target=visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() elif cmp(modelType, "GB") == 0: cm.dot(hidActP, weight.T, target=visActP) visActP.add_row_vec(biasV) visActP.add(np.random.randn(batchSize, numVis), target=visActP) # another positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.subtract_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0, mult=-1) biasHInc.add_sums(hidActP, axis=0, mult=-1) #update weight and bias weight.add_mult(weightInc, eta / batchSize) biasV.add_mult(biasVInc, eta / batchSize) biasH.add_mult(biasHInc, eta / batchSize) # if epoch > avgStart : # # apply average # weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t)) # biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t)) # biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t)) # t = t+1 # else : # weightAgv = weight # biasVAgv = biasV # biasHAgv = biasH # reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm()**2) # free device memory visTrue.free_device_memory() if verbose: print "epoch %d/%d. Reconstruction error is %f " % ( epoch + 1, maxEpoch, sum(error)) # save rbm model top = cm.CUDAMatrix(np.zeros((numCases, numHid))) cm.dot(cm.CUDAMatrix(cm.reformat(X)), weight, target=top) top.add_row_vec(biasH) top.apply_sigmoid() weight.copy_to_host() biasV.copy_to_host() biasH.copy_to_host() top.copy_to_host() model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, \ biasH.numpy_array, type = modelType, top = top.numpy_array) # free device memory deviceData.free_device_memory() weight.free_device_memory() biasV.free_device_memory() biasH.free_device_memory() weightInc.free_device_memory() biasVInc.free_device_memory() biasHInc.free_device_memory() hidActP.free_device_memory() visActP.free_device_memory() hidState.free_device_memory() cm.shutdown() return model_
def rbm(X, numHid, **kwargs) : """ rbm defination data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities when type is GB, should be continuous real value. data should have a format of *.npy numHid : number nodes of and hidden layer type : rbm type, can be set as BB or GB method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis batchsize The number of training instances per batch verbose For printing progress model.type Type of RBM (i.e. type of its visible and hidden units) model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.top The activity of the top layer, to be used when training DBN's errors The errors in reconstruction at every epoch """ # when compute the transpose of a matrix, using the method *.transpose() is much space consuming. I suggest we can use # .T atrribute instead arg = util.processOptions(kwargs, \ modelType = "BB", \ method = "CD", \ eta = 0.1, \ momentum = 0.5,\ maxEpoch = 500, \ avgLast = 0, \ penalty = 0, \ batchSize = 100, \ verbose = True) [modelType, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\ arg["modelType"], \ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"] ] # from which step, we start to compute the average # avgStart = maxEpoch - avgLast # for weight decay use # oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(X.shape) if verbose : print "processing data" numVis = numDims numBatch = util.ceil(numCases, batchSize) # shuffle the data data = copy.deepcopy(X) np.random.shuffle(data) # init CUDA # cm.cuda_set_device() cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) # init weights weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1,numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1,numHid))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) for epoch in range(maxEpoch) : error = [] for batch in range(numBatch) : # train each data batch if batchSize*(batch+1) > numCases : visTrue = deviceData.get_row_slice(batchSize*batch, numCases) batchSize = visTrue.shape[0] visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) else : visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) #apply momentum weightInc.mult(momentum) biasVInc.mult(momentum) biasHInc.mult(momentum) # positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.add_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0) biasHInc.add_sums(hidActP, axis=0) hidState.fill_with_rand() hidState.less_than(hidActP, target=hidActP) if cmp(method, "SML") == 0 : if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) : pass # here does not need in practical use elif cmp(method, "CD") == 0 : pass # negetive phase if cmp(modelType, "BB") == 0 : cm.dot(hidActP, weight.T, target = visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() elif cmp(modelType, "GB") == 0 : cm.dot(hidActP, weight.T, target = visActP) visActP.add_row_vec(biasV) visActP.add(np.random.randn(batchSize, numVis),target=visActP) # another positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.subtract_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0, mult=-1) biasHInc.add_sums(hidActP, axis=0, mult=-1) #update weight and bias weight.add_mult(weightInc, eta/batchSize) biasV.add_mult(biasVInc, eta/batchSize) biasH.add_mult(biasHInc, eta/batchSize) # if epoch > avgStart : # # apply average # weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t)) # biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t)) # biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t)) # t = t+1 # else : # weightAgv = weight # biasVAgv = biasV # biasHAgv = biasH # reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm() ** 2) # free device memory visTrue.free_device_memory() if verbose : print "epoch %d/%d. Reconstruction error is %f " % (epoch+1, maxEpoch, sum(error)) # save rbm model top = cm.CUDAMatrix(np.zeros((numCases, numHid))) cm.dot(cm.CUDAMatrix(cm.reformat(X)), weight, target = top) top.add_row_vec(biasH) top.apply_sigmoid() weight.copy_to_host() biasV.copy_to_host() biasH.copy_to_host() top.copy_to_host() model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, \ biasH.numpy_array, type = modelType, top = top.numpy_array) # free device memory deviceData.free_device_memory() weight.free_device_memory() biasV.free_device_memory() biasH.free_device_memory() weightInc.free_device_memory() biasVInc.free_device_memory() biasHInc.free_device_memory() hidActP.free_device_memory() visActP.free_device_memory() hidState.free_device_memory() cm.shutdown() return model_
def rbmPredict(m, X): """using trained rbm model to do prediction""" nClass = m.labels.size numCase = X.shape[0] # This part is executed on CPU # define the free energy # FF = np.zeros((numCase, nClass)) # FFcol = np.zeros((numCase, 1)) # for index in range(nClass) : # temp = np.zeros((numCase, nClass)) # temp[:, index] = 1 # # tt = np.emath.log(np.exp(np.dot(X, m.weight)+ np.dot(temp, m.weightLabel) + m.biasH)+1) # # FFcol = temp[:,index] * m.biasLabel[0,index] + np.sum(tt,axis = 1) # # FF[:, index] = FFcol # # [x, y] = np.where(np.abs(FF - np.max(FF, axis=1, keepdims=True)) < 1e-5) # result = np.zeros(y.shape) # for index in range(y.size) : # result[index] = m.labels[y[index]] # The following part runs on GPU cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasH = cm.CUDAMatrix(cm.reformat(m.biasH)) weightLabel = cm.CUDAMatrix(cm.reformat(m.weightLabel)) biasLabel = cm.CUDAMatrix(cm.reformat(m.biasLabel)) F = cm.CUDAMatrix(np.zeros((numCase, nClass))) Fcol = cm.CUDAMatrix(np.zeros((numCase, 1))) temp = cm.CUDAMatrix(np.zeros((numCase, nClass))) tt = cm.CUDAMatrix(np.zeros((numCase, biasH.asarray().size))) for index in range(nClass): temp.assign(0) temp.set_col_slice(index, index + 1, 1) tt = cm.dot(data, weight) tt.add_dot(temp, weightLabel) tt.add_row_vec(biasH) cm.log_1_plus_exp(tt, target=tt, exact=True) Fcol = cm.sum(tt, axis=1) Fcol.add_mult(temp.get_col_slice(index, index + 1), biasLabel.numpy_array[0, index]) F.set_col_slice(index, index + 1, Fcol) tt.free_device_memory() F.copy_to_host() [x, y] = np.where( np.abs(F.numpy_array - np.max(F.numpy_array, axis=1, keepdims=True)) < 1e-5) # free device memory data.free_device_memory() weight.free_device_memory() biasH.free_device_memory() biasLabel.free_device_memory() weightLabel.free_device_memory() F.free_device_memory() Fcol.free_device_memory() temp.free_device_memory() cm.shutdown() result = np.zeros(y.shape) for index in range(y.size): result[index] = m.labels[y[index]] return [result, F.numpy_array]
input = input.cuda() one, mone = one.cuda(), mone.cuda() noise, fixed_noise = noise.cuda(), fixed_noise.cuda() # setup optimizer if opt.adam: optimizerG = optim.Adam(netG.parameters(), lr=opt.lrG, betas=(opt.beta1, 0.999)) else: optimizerG = optim.RMSprop(netG.parameters(), lr=opt.lrG) # initialization of cudamat if opt.sinkgpu: cudamat.cublas_init() normalizeL = torch.Tensor([opt.regL]).double() SCORE = [] gen_iterations = 0 for epoch in range(opt.niter): data_iter = iter(dataloader) i = 0 tmp_score = [] while i < len(dataloader): ############################ # (1) sample the empirical data first ########################### data = data_iter.next()
def calc_output_legacy(self, data, batch_size): """ Calculate the output (probababilies) for a set of data The purpose of this function is to calculate the output of a DN on some set of data. The values will calculated using rbm_cudamat on slices of data specified by the batch size """ import cudamat as cm import rbm_numpy, rbm_cudamat # Initialize CUDA cm.cublas_init() cm.CUDAMatrix.init_random(1) if self.legacy_card_number != 0: cm.cuda_set_device(self.legacy_card_number) # Create output, use the size of the last layer to do this output = np.empty( (data.shape[0], self.arch[(self.layer_count - 1)]['node_count'])) # Slice up data, handling batches of batch_size. USE INT DIVISION processed = 0 for j in range(data.shape[0] // batch_size): curr_data = data[j * batch_size:(j + 1) * batch_size, :] for i in range(1, self.layer_count): # Handle a sigmoid node if self.arch[i]['node_type'] == 'S': curr_data = \ rbm_cudamat.calc_hidden_probs(curr_data, self.weights[i]['w'], self.weights[i]['hb'], batch_size) output[j * batch_size:(j + 1) * batch_size, :] = curr_data[:, :] processed = processed + batch_size # Now handle anything that was left over i.e., what didn't fit in if processed != data.shape[0]: curr_data = data[processed:, :] for i in range(1, self.layer_count): # Handle a sigmoid node if self.arch[i]['node_type'] == 'S': curr_data = \ rbm_numpy.calc_hidden_probs(curr_data, self.weights[i]['w'], self.weights[i]['hb']) output[processed:, :] = curr_data[:, :] cm.cublas_shutdown() return output
# This file shows how to implement a single hidden layer neural network for # performing binary classification on the GPU using cudamat. import pdb import time import numpy as np import cudamat as cm from cudamat import learn as cl import util # initialize CUDA cm.cublas_init() # load data util.load('mnist49.dat', globals()) # Put training data onto the GPU. dat_train = dat_train/255. dat_train = dat_train - (np.mean(dat_train, 1)+10**-8)[:, np.newaxis] dev_train = cm.CUDAMatrix(dat_train) dev_lbl = cm.CUDAMatrix(lbl_train) # training parameters epsilon = 0.01 momentum = 0.9 num_epochs = 30 batch_size = 128 num_batches = dat_train.shape[1]/batch_size # model parameters
def rbm(data, numHid, modelType="BB", **kwargs): """ rbm defination data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities when type is GB, should be continuous real value. data should have a format of *.npy numHid : number nodes of and hidden layer type : rbm type, can be set as BB or GB additional inputs (specified as name value pairs or in struct) method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis penalty weight decay factor batchsize The number of training instances per batch verbose For printing progress anneal Flag. If set true, the penalty is annealed linearly through epochs to 10% of its original value OUTPUTS: model.type Type of RBM (i.e. type of its visible and hidden units) model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.top The activity of the top layer, to be used when training DBN's errors The errors in reconstruction at every epoch """ arg = util.processOptions(kwargs, \ method = "CD", \ eta = 0.1, \ momentum = 0.9,\ maxEpoch = 50, \ avgLast = 0, \ penalty = 0, \ batchSize = 50, \ verbose = True, \ anneal = False) [method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose, anneal] = [\ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"],\ arg["anneal"] ] # from which step, we start to compute the average avgStart = maxEpoch - avgLast # for weight decay use oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(data.shape) if verbose: print "processing data" numVis = numDims numBatch = util.ceil(numCases, batchSize) # shuffle the data np.random.shuffle(data) # init CUDA # cm.cuda_set_device() cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) # init weights weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1, numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1, numHid))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidActP2 = cm.empty((batchSize, numHid)) visState = cm.empty((batchSize, numVis)) hidState = cm.empty((batchSize, numHid)) t = 1 for epoch in range(maxEpoch): error = [] if anneal: # apply linear weight decay penalty = oldPenalty - 0.9 * epoch / maxEpoch * oldPenalty for batch in range(numBatch): # train each data batch if batchSize * (batch + 1) > numCases: visTrue = deviceData.get_row_slice(batchSize * batch, numCases) batchSize = visTrue.shape[0] else: visTrue = deviceData.get_row_slice(batchSize * batch, batchSize * (batch + 1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) # positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() hidState.fill_with_rand() hidState.less_than(hidActP, target=hidState) if cmp(method, "SML") == 0: if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)): pass # here does not need in practical use elif cmp(method, "CD") == 0: pass # negetive phase if cmp(modelType, "BB") == 0: cm.dot(hidState, weight.transpose(), target=visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() visState.fill_with_rand() visState.less_than(visActP, target=visState) elif cmp(modelType, "GB") == 0: cm.dot(hidState, weight.transpose(), target=visActP) visActP.add_row_vec(biasV) visActP.add(np.random.randn(batchSize, numVis), target=visState) # another positive phase cm.dot(visState, weight, target=hidActP2) hidActP2.add_row_vec(biasH) hidActP2.apply_sigmoid() hidState.fill_with_rand() hidState.less_than(hidActP2, target=hidState) #update weight and bias dWeight = cm.dot(visTrue.transpose(), hidActP) dWeight.subtract_dot(visState.transpose(), hidActP2) dBiasV = visTrue.sum(axis=0).subtract(visState.sum(axis=0)) dBiasH = hidActP.sum(axis=0).subtract(hidActP2.sum(axis=0)) dWeight.divide(batchSize).subtract(weight.mult(penalty)) dBiasV.divide(batchSize) dBiasH.divide(batchSize) weightInc.mult(momentum).add_mult(dWeight, eta) biasVInc.mult(momentum).add_mult(dBiasV, eta) biasHInc.mult(momentum).add_mult(dBiasH, eta) weight.add(weightInc) biasV.add(biasVInc) biasH.add(biasHInc) if epoch > avgStart: # apply average weightAgv.subtract(weightAgv.subtract(weight).mult(1.0 / t)) biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0 / t)) biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0 / t)) t = t + 1 else: weightAgv = weight biasVAgv = biasV biasHAgv = biasH # reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm()**2) if verbose: print "epoch %d/%d. Reconstruction error is %f " % ( epoch + 1, maxEpoch, sum(error)) # save rbm model top = cm.CUDAMatrix(np.zeros((numCases, numHid))) cm.dot(deviceData, weightAgv, target=top) top.add_row_vec(biasHAgv) top.apply_sigmoid() model_ = m.rbmModel(weightAgv, biasVAgv, biasHAgv, type=modelType, top=top) cm.shutdown() return model_
#################################################### # Fonction principale #################################################### if __name__ == "__main__": # Vérifie les params if len(sys.argv) != 2: print "Donnez un nom de fichier!" exit() # Import les digits print "Importation des digits depuis {}".format(sys.argv[1]) digitImport = MNISTImporter.Open(sys.argv[1]) # Init CUBLAS cb.cublas_init() # Créer du réservoir reservoir = Oger.nodes.CUDAReservoirNode(input_dim = digitImport.nbInputs, output_dim = rc_Size, input_scaling = rc_InputScaling, spectral_radius = rc_SpectralRadius) readout = Oger.nodes.RidgeRegressionNode(output_dim = rc_nbDigits, dtype='float64') classifier = DigitClassifierNode(mnist_space = digitImport.interImagesSpace, label_space_ratio = digitImport.interImagesRatio, digit_space_ratio = digitImport.digitImageRatio, image_size = digitImport.imagesSize, nb_digit = rc_nbDigits, method = "average", input_dim = rc_nbDigits, dtype='float64') # Récupère une partie du jeu d'entrainement et des labels inputs, outputs = digitImport.getTrainingSet(length = rc_TrainingLength) inputs_test, outputs_test = digitImport.getTestSet(length = rc_TestLength) data = [None, [(inputs, outputs)], None] # Construction du flux flow = mdp.Flow([reservoir, readout, classifier], verbose=0) # Entrainement du réseau
def main( full_vocab_file: str, repr_vocab_file: str, output: str, n_components: int, sim: str, sim_alignment_matrix: str, n_ngram: int, use_gpu: bool, processes: int, ) -> None: """Compute KPCA embeddings on a given data set.""" n = n_ngram # meh output = os.path.abspath(output) os.makedirs(output, exist_ok=True) full_vocab = _preprocess_vocab_file(full_vocab_file) if repr_vocab_file is None: repr_vocab = full_vocab else: repr_vocab = _preprocess_vocab_file(repr_vocab_file) params_path = os.path.join(output, 'training_manifest.json') secho(f'Outputting training information to {params_path}') manifest = dict( sim=sim, n=n, len_full_vocab=len(full_vocab), len_repr_vocab=len(repr_vocab), kernels=kernels, ) with open(params_path, 'w') as file: json.dump(manifest, file, sort_keys=True, indent=2) if use_gpu: import cudamat as cm cm.cublas_init() if sim == 'global-alignment': secho( f'Computing global alignment similarities with {sim_alignment_matrix}' ) repr_similarity_matrix = calculate_global_alignment_similarity_matrix( full_vocab=repr_vocab, repr_vocab=repr_vocab, processes=processes, matrix=sim_alignment_matrix, tqdm_desc=f'{EMOJI} Computing self-similarity matrix for ' f'repr vocab with global alignment ({sim_alignment_matrix})') full_similarity_matrix = calculate_global_alignment_similarity_matrix( full_vocab=full_vocab, repr_vocab=repr_vocab, processes=processes, matrix=sim_alignment_matrix, tqdm_desc=f'{EMOJI} Computing similarity matrix between ' f'full/repr vocab with global alignment ({sim_alignment_matrix})') else: alphabet = set(itt.chain.from_iterable(repr_vocab)) alphabet.add(" ") ngram_to_index = { ngram: i for i, ngram in enumerate( ["".join(t) for t in itt.product(alphabet, repeat=n)]) } if sim == "ngram_intersec": secho(f'Computing n-gram sparse similarities with {sim}') repr_similarity_matrix = compute_similarity_matrix_ngram_sparse( full_vocab=repr_vocab, repr_vocab=repr_vocab, ngram_to_index=ngram_to_index, n=n, ) full_similarity_matrix = compute_similarity_matrix_ngram_sparse( full_vocab=full_vocab, repr_vocab=repr_vocab, ngram_to_index=ngram_to_index, n=n, ) else: # sim == 'ngram_sim' secho(f'Computing n-gram similarities with {sim}') repr_similarity_matrix = compute_similarity_matrix_ngram_parallel( full_vocab=repr_vocab, repr_vocab=repr_vocab, n=n, ngram_to_index=ngram_to_index, processes=processes, # Extra because this gets multi-processed ) full_similarity_matrix = compute_similarity_matrix_ngram_parallel( full_vocab=full_vocab, repr_vocab=repr_vocab, n=n, ngram_to_index=ngram_to_index, processes=processes, # Extra because this gets multi-processed ) repr_similarity_matrix_path = os.path.join(output, f"repr_similarity_matrix.npy") secho( f"Saving the repr similarity matrix for the full vocabulary to {repr_similarity_matrix_path}" ) np.save(repr_similarity_matrix_path, repr_similarity_matrix, allow_pickle=False) full_similarity_matrix_path = os.path.join(output, f"full_similarity_matrix.npy") secho( f"Saving the full similarity matrix for the full vocabulary to {full_similarity_matrix_path}" ) np.save(full_similarity_matrix_path, full_similarity_matrix, allow_pickle=False) optim_folder = os.path.join(output, 'optim') os.makedirs(optim_folder, exist_ok=True) if n_components is None: n_components = int(0.5 + len(repr_vocab) * 2 / 3) optimize_projections( output=optim_folder, repr_similarity_matrix=repr_similarity_matrix, full_similarity_matrix=full_similarity_matrix, n_components=n_components, similarity_type=sim, use_gpu=use_gpu, ) if use_gpu: # only shut down after all loops have used this function import cudamat as cm cm.shutdown() secho(f"done. Enjoy your {make_ratvec(3)}")
def __init__(self,neu,n_in,n_out, gama=0.5,ro=1,psi=0.5,in_scale=0.1, bias_scale=0.5,alfa=10,forget = 1, initial_filename="initial", load_initial = False,save_initial = False,noise_amplitude = 0): #All matrixes are initialized under the normal distribution. cm.cublas_init() print "initializing reservoir" print n_in,"Number of inputs" self.neu = neu self.n_in = n_in self.n_out = n_out self.noise_amplitude = noise_amplitude # Reservoir Weight matrix. print "initializing reservoir matrix" self.Wrr0 = cm.CUDAMatrix(np.random.normal(0,1,[neu,neu],)) print "initializing input matrix" # input-reservoir weight matrix self.Wir0 = cm.CUDAMatrix(np.random.normal(0,1,[neu,n_in])) # bias-reservoir weight matrix print "initializing bias matrix" self.Wbr0 = cm.CUDAMatrix(np.random.normal(0,1,[neu,1])) self.Wrr = cm.empty(self.Wrr0.shape) self.Wbr = cm.empty(self.Wbr0.shape) self.Wir = cm.empty(self.Wir0.shape) #self.Wbo = np.random.normal(0,1,[n_out,1]) # reservoir-output weight matrix print "initializing Wro" self.Wro = cm.CUDAMatrix(np.random.normal(0,1,[n_out,neu])) self.leakrate = gama #the network's leak rate self.ro = ro #the network's desired spectral radius self.psi = psi #the network's sparcity, in 0 to 1 notation self.in_scale = in_scale #the scaling of Wir. self.bias_scale = bias_scale #the scaling of Wbr # learning rate of the Recursive Least Squares Algorithm self.alfa = alfa # forget factor of the RLS Algorithm self.forget = forget #self.a = np.random.normal(0, 1, [neu, 1]) self.a = cm.CUDAMatrix(np.zeros([neu, 1])) #save if save is enabled if save_initial: self.save_initial_fun(initial_filename) #load if load is enabled if load_initial: self.LoedInitial(initial_filename) # the probability of a memeber of the Matrix Wrr being zero is psi. print "define sparseness" if psi > 0: self.Wrr = Sparcity(self.Wrr0,self.psi) else: self.Wrr.assign(self.Wrr0) #forcing Wrr to have ro as the maximum eigenvalue print "calculating eigenvalues" eigs = np.linalg.eigvals(self.Wrr.asarray()) print "finding maximum eigenvalue" radius = np.abs(np.max(eigs)) #normalize matrix print "normalize reservoir" self.Wrr.divide(np.asscalar(radius)) #set its spectral radius to rho self.Wrr.mult(ro) #scale tbe matrices self.Wbr0.mult(bias_scale,target = self.Wbr) self.Wir0.mult(in_scale, target=self.Wir) #initial conditions variable forget factor. self.sigma_e = 0.001 self.sigma_q = 0.001 self.sigma_v = 0.001 self.K_a = 6.0 self.K_b = 3.0*self.K_a #covariance matrix self.P = cm.CUDAMatrix(np.eye(neu)/alfa) print "Reservoir initialization Done"
def matrix_factorization_clustering(X_aux, k, l, norm=False, num_iters=100): cm.cublas_init() m, n = X_aux.shape U = cm.CUDAMatrix(np.random.rand(m, k)) S = cm.CUDAMatrix(np.random.rand(k, l)) V = cm.CUDAMatrix(np.random.rand(n, l)) X = cm.CUDAMatrix(X_aux) # if norm: # X = Normalizer().fit_transform(X) XV = cm.CUDAMatrix(np.random.rand(m, l)) XVSt = cm.CUDAMatrix(np.random.rand(m, k)) US = cm.CUDAMatrix(np.random.rand(m, l)) USVt = cm.CUDAMatrix(np.random.rand(m, n)) USVtXt = cm.CUDAMatrix(np.random.rand(m, m)) USVtXtU = cm.CUDAMatrix(np.random.rand(m, k)) U_aux = cm.CUDAMatrix(np.random.rand(m, k)) XtUS = cm.CUDAMatrix(np.random.rand(m, l)) VSt = cm.CUDAMatrix(np.random.rand(n, k)) VStUt = cm.CUDAMatrix(np.random.rand(n, m)) UtX = cm.CUDAMatrix(np.random.rand(k, n)) VStUtXV = cm.CUDAMatrix(np.random.rand(n, l)) V_aux = cm.CUDAMatrix(np.random.rand(n, l)) UtXV = cm.CUDAMatrix(np.random.rand(k, l)) UtUS = cm.CUDAMatrix(np.random.rand(k, l)) UtUSVt = cm.CUDAMatrix(np.random.rand(k, n)) UtUSVtV = cm.CUDAMatrix(np.random.rand(k, l)) S_aux = cm.CUDAMatrix(np.random.rand(k, l)) error_best = np.inf error = np.inf for i in range(num_iters): # compute U cm.dot(X, V, target=XV) cm.dot(XV, S.T, target=XVSt) if i is 0: cm.dot(U, S, target=US) cm.dot(US, V.T, target=USVt) cm.dot(USVt, X.T, target=USVtXt) cm.dot(USVtXt, U, target=USVtXtU) cm.divide(XVSt, USVtXtU, U_aux) cm.mult(U, U_aux, U) # compute V cm.dot(U, S, target=US) cm.dot(X.T, US, target=XtUS) cm.dot(V, S.T, target=VSt) cm.dot(VSt, U.T, target=VStUt) cm.dot(VStUt, XV, target=VStUtXV) cm.divide(XtUS, VStUtXV, target=V_aux) cm.mult(V, V_aux, V) # compute S cm.dot(U.T, X, target=UtX) cm.dot(UtX, V, target=UtXV) cm.dot(U.T, US, target=UtUS) cm.dot(UtUS, V.T, UtUSVt) cm.dot(UtUSVt, V, target=UtUSVtV) cm.divide(UtXV, UtUSVtV, target=S_aux) cm.mult(S, S_aux, target=S) error_ant = error cm.dot(U, S, target=US) cm.dot(US, V.T, target=USVt) error = cm.sum(cm.pow(cm.subtract(X, USVt), 2), axis=0) if error < error_best: U_best_cm = U S_best_cm = S V_best_cm = V error_best = error if np.abs(error - error_ant) <= 0.000001: break U_best = U_best_cm.asarray() S_best = S_best_cm.asarray() V_best = V_best_cm.asarray() Du = np.diag(np.ones(m).dot(U_best)) Dv = np.diag(np.ones(n).dot(V_best)) U_norm = U_best.dot( np.diag(S_best.dot(Dv).dot(np.ones(l))) ) V_norm = V_best.dot( np.diag(np.ones(k).dot(Du).dot(S_best)) ) rows_ind = np.argmax(U_best, axis=1) cols_ind = np.argmax(V_best, axis=1) cm.shutdown() return U_norm, S_best, V_norm, rows_ind, cols_ind, error_best
# NMF algorithms in cudamat import numpy as np import cudamat as cm import NMFbase # initialize the cudamat library cm.cublas_init() class NMFcudamat(NMFbase.NMFbase): def getH(self): return(self.H_gpu.asarray()) def getW(self): return(self.W_gpu.asarray()) class NMF(NMFcudamat): def setVariables(self): self.H_gpu = cm.CUDAMatrix(self.H) self.W_gpu = cm.CUDAMatrix(self.W) self.X_gpu = cm.CUDAMatrix(self.X) self.WTW_gpu = cm.empty((self.rank, self.rank)) self.WTWH_gpu = cm.empty(self.H.shape) self.WTX_gpu = cm.empty(self.H.shape) self.XHT_gpu = cm.empty(self.W.shape) self.WH_gpu = cm.empty(self.X.shape) self.WHHT_gpu = cm.empty(self.W.shape)
def matrix_factorization_clustering(X_aux, k, l, norm=False, num_iters=100): cm.cublas_init() m, n = X_aux.shape U = cm.CUDAMatrix(np.random.rand(m, k)) S = cm.CUDAMatrix(np.random.rand(k, l)) V = cm.CUDAMatrix(np.random.rand(n, l)) X = cm.CUDAMatrix(X_aux) # if norm: # X = Normalizer().fit_transform(X) XV = cm.CUDAMatrix(np.random.rand(m, l)) XVSt = cm.CUDAMatrix(np.random.rand(m, k)) US = cm.CUDAMatrix(np.random.rand(m, l)) USVt = cm.CUDAMatrix(np.random.rand(m, n)) USVtXt = cm.CUDAMatrix(np.random.rand(m, m)) USVtXtU = cm.CUDAMatrix(np.random.rand(m, k)) U_aux = cm.CUDAMatrix(np.random.rand(m, k)) XtUS = cm.CUDAMatrix(np.random.rand(m, l)) VSt = cm.CUDAMatrix(np.random.rand(n, k)) VStUt = cm.CUDAMatrix(np.random.rand(n, m)) UtX = cm.CUDAMatrix(np.random.rand(k, n)) VStUtXV = cm.CUDAMatrix(np.random.rand(n, l)) V_aux = cm.CUDAMatrix(np.random.rand(n, l)) UtXV = cm.CUDAMatrix(np.random.rand(k, l)) UtUS = cm.CUDAMatrix(np.random.rand(k, l)) UtUSVt = cm.CUDAMatrix(np.random.rand(k, n)) UtUSVtV = cm.CUDAMatrix(np.random.rand(k, l)) S_aux = cm.CUDAMatrix(np.random.rand(k, l)) error_best = np.inf error = np.inf for i in range(num_iters): # compute U cm.dot(X, V, target=XV) cm.dot(XV, S.T, target=XVSt) if i is 0: cm.dot(U, S, target=US) cm.dot(US, V.T, target=USVt) cm.dot(USVt, X.T, target=USVtXt) cm.dot(USVtXt, U, target=USVtXtU) cm.divide(XVSt, USVtXtU, U_aux) cm.mult(U, U_aux, U) # compute V cm.dot(U, S, target=US) cm.dot(X.T, US, target=XtUS) cm.dot(V, S.T, target=VSt) cm.dot(VSt, U.T, target=VStUt) cm.dot(VStUt, XV, target=VStUtXV) cm.divide(XtUS, VStUtXV, target=V_aux) cm.mult(V, V_aux, V) # compute S cm.dot(U.T, X, target=UtX) cm.dot(UtX, V, target=UtXV) cm.dot(U.T, US, target=UtUS) cm.dot(UtUS, V.T, UtUSVt) cm.dot(UtUSVt, V, target=UtUSVtV) cm.divide(UtXV, UtUSVtV, target=S_aux) cm.mult(S, S_aux, target=S) error_ant = error cm.dot(U, S, target=US) cm.dot(US, V.T, target=USVt) error = cm.sum(cm.pow(cm.subtract(X, USVt), 2), axis=0) if error < error_best: U_best_cm = U S_best_cm = S V_best_cm = V error_best = error if np.abs(error - error_ant) <= 0.000001: break U_best = U_best_cm.asarray() S_best = S_best_cm.asarray() V_best = V_best_cm.asarray() Du = np.diag(np.ones(m).dot(U_best)) Dv = np.diag(np.ones(n).dot(V_best)) U_norm = U_best.dot(np.diag(S_best.dot(Dv).dot(np.ones(l)))) V_norm = V_best.dot(np.diag(np.ones(k).dot(Du).dot(S_best))) rows_ind = np.argmax(U_best, axis=1) cols_ind = np.argmax(V_best, axis=1) cm.shutdown() return U_norm, S_best, V_norm, rows_ind, cols_ind, error_best