def rbmHtoV(m, X): """convey data fron hidden layer to visible layer""" cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasV = cm.CUDAMatrix(cm.reformat(m.biasV)) nCase = X.shape[0] nVis = biasV.asarray().size VisActP = cm.CUDAMatrix(np.zeros((nCase, nVis))) if m.type == "BB": cm.dot(data, weight.T, target=VisActP) VisActP.add_row_vec(biasV) VisActP.apply_sigmoid() elif m.type == "BG": cm.dot(data, weight.T, target=VisActP) VisActP.add_row_vec(biasV) elif m.type == "GB": pass result = VisActP.asarray() #free device memory data.free_device_memory() weight.free_device_memory() biasV.free_device_memory() VisActP.free_device_memory() cm.shutdown() return result
def infer( full_sim_matrix_file: str, repr_sim_matrix_file: str, output: str, n_components: int, sim: str, use_gpu: bool, ): """Load pre-computed similarity matrix.""" secho( f"Loading the repr similarity matrix for the full vocabulary to {repr_sim_matrix_file}" ) repr_similarity_matrix = np.load(repr_sim_matrix_file) secho( f"Loading the full similarity matrix for the full vocabulary to {full_sim_matrix_file}" ) full_similarity_matrix = np.load(full_sim_matrix_file) optim_folder = os.path.join(output, 'optim') os.makedirs(optim_folder, exist_ok=True) optimize_projections( output=optim_folder, repr_similarity_matrix=repr_similarity_matrix, full_similarity_matrix=full_similarity_matrix, n_components=n_components, similarity_type=sim, use_gpu=use_gpu, ) if use_gpu: # only shut down after all loops have used this function import cudamat as cm cm.shutdown() secho(f"done. Enjoy your {make_ratvec(3)}")
def rbmHtoV(m, X) : """convey data fron hidden layer to visible layer""" cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasV = cm.CUDAMatrix(cm.reformat(m.biasV)) nCase = X.shape[0] nVis = biasV.asarray().size VisActP = cm.CUDAMatrix(np.zeros((nCase, nVis))) if m.type == "BB" : cm.dot(data, weight.T, target = VisActP) VisActP.add_row_vec(biasV) VisActP.apply_sigmoid() elif m.type == "BG" : cm.dot(data, weight.T, target = VisActP) VisActP.add_row_vec(biasV) elif m.type == "GB" : pass result = VisActP.asarray() #free device memory data.free_device_memory() weight.free_device_memory() biasV.free_device_memory() VisActP.free_device_memory() cm.shutdown() return result
def main(): cmt.init() cmt.CUDAMatrix.init_random() if HEATUP: print("heating up for %g seconds..." % HEATUP, end=' ') sys.stdout.flush() heatup(HEATUP) print("done.") print("small matrix shape:", XS_SHAPE) print("large matrix shape:", XL_SHAPE) for funcname, func in filter(lambda f: f[0].startswith('bench_'), getmembers(getmodule(main), isfunction)): print("%-15s" % funcname[len('bench_'):], end=' ') sys.stdout.flush() for size, shape, factor in ('small', XS_SHAPE, 10), ('large', XL_SHAPE, 1): repeat = NUM_REPEATS * getattr(func, 'repeats', 1) time = min(timeit.repeat(\ setup="from __main__ import setup, %s\nmats = setup(%s)" % (funcname, shape), stmt="%s(*mats)" % funcname, repeat=repeat, number=NUM_ITER * factor)) / (NUM_ITER * factor) print("%.3es (%s) " % (time, size), end=' ') sys.stdout.flush() print() cmt.shutdown()
def main(argv): global episodes global graph_size cm.init() env = gym.make('SpaceInvaders-v0') action_space = env.action_space.n observation_shape = env.observation_space.shape observation_space = observation_shape[0] / 3 * observation_shape[ 1] / 4 * observation_shape[2] observation_space = int(observation_space) total_size = graph_size + action_space + observation_space + 1 # Additional spot for reward graph = Graph(total_size) # Run until done for i in range(episodes): # Initial step x = np.random.normal(size=graph_size) action = np.zeros(action_space) input_val = create_input(env.reset(), action, x, 0.0) output = graph.predict(input_val, 0.2) action = output[observation_space:observation_space + action_space] while True: observation, reward, done, info = env.step(np.argmax(action)) if done: print('Final reward: %f' % (reward, )) break # Update graph input_val = create_input(observation, action, x, reward) x = graph(input_val, 0.2) x = x[observation_space + action_space:-1] # env.render() # Select next action if random.random() < 0.3: action = np.zeros(action_space) action[env.action_space.sample()] = 1.0 else: input_val = create_input(observation, np.zeros(action_space), x, 10000.0) output = graph.predict(input_val, 0.2) action = output[observation_space:observation_space + action_space] graph.save('graph.npy') env.close() cm.shutdown()
def rbmVtoH(m, X) : """convey data fron visual layer to hidden layer""" cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasH = cm.CUDAMatrix(cm.reformat(m.biasH)) nCase = X.shape[0] nHid = biasH.asarray().size hidActP = cm.CUDAMatrix(np.zeros((nCase, nHid))) if m.type == "BB" : cm.dot(data, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() elif m.type == "BG" : cm.dot(data, weight, target = hidActP) hidActP.add_row_vec(biasH) elif m.type == "GB" : pass result = hidActP.asarray() # free device memory data.free_device_memory() weight.free_device_memory() biasH.free_device_memory() hidActP.free_device_memory() cm.shutdown() return result
def main(): cmt.init() cmt.CUDAMatrix.init_random() if HEATUP: print("heating up for %g seconds..." % HEATUP, end=' ') sys.stdout.flush() heatup(HEATUP) print("done.") print("small matrix shape:", XS_SHAPE) print("large matrix shape:", XL_SHAPE) for funcname, func in filter(lambda fn_f: fn_f[0].startswith('bench_'), getmembers(getmodule(main), isfunction)): print("%-15s" % funcname[len('bench_'):], end=' ') sys.stdout.flush() for size, shape, factor in ('small', XS_SHAPE, 10), ('large', XL_SHAPE, 1): repeat = NUM_REPEATS * getattr(func, 'repeats', 1) time = min(timeit.repeat(\ setup="from __main__ import setup, %s\nmats = setup(%s)" % (funcname, shape), stmt="%s(*mats)" % funcname, repeat=repeat, number=NUM_ITER * factor)) / (NUM_ITER * factor) print("%.3es (%s) " % (time, size), end=' ') sys.stdout.flush() print() cmt.shutdown()
batchsz = 1000 tic = time.time() cm.cublas_init() pool = scipy.io.loadmat(pool_path) W = cm.CUDAMatrix(pool.get('W')) data_dim = W.shape[0] pool_dim = W.shape[1] trainXP = np.zeros((pool_dim, trainsz)) testXP = np.zeros((pool_dim, testsz)) XP_gpu = cm.empty((pool_dim, batchsz)) for n in range(n_train_file): print "processing train %d" % n XC = np.load(train_path % (n+1)) for i in range(filesz/batchsz): XC_gpu = cm.CUDAMatrix(XC[:,i*batchsz:(i+1)*batchsz]) cm.dot(W.T, XC_gpu, target=XP_gpu) trainXP[:,n*filesz+i*batchsz:n*filesz+(i+1)*batchsz] = XP_gpu.asarray() for n in range(n_test_file): print "processing test %d" % n XC = np.load(test_path % (n+1)) for i in range(filesz/batchsz): XC_gpu = cm.CUDAMatrix(XC[:,i*batchsz:(i+1)*batchsz]) cm.dot(W.T, XC_gpu, target=XP_gpu) testXP[:,n*filesz+i*batchsz:n*filesz+(i+1)*batchsz] = XP_gpu.asarray() cm.shutdown() scipy.io.savemat(out_path , {"trainXC":trainXP,"testXC":testXP}) print "time %f" % (time.time() - tic)
def rbmPredict(m, X): """using trained rbm model to do prediction""" nClass = m.labels.size numCase = X.shape[0] # This part is executed on CPU # define the free energy # FF = np.zeros((numCase, nClass)) # FFcol = np.zeros((numCase, 1)) # for index in range(nClass) : # temp = np.zeros((numCase, nClass)) # temp[:, index] = 1 # # tt = np.emath.log(np.exp(np.dot(X, m.weight)+ np.dot(temp, m.weightLabel) + m.biasH)+1) # # FFcol = temp[:,index] * m.biasLabel[0,index] + np.sum(tt,axis = 1) # # FF[:, index] = FFcol # # [x, y] = np.where(np.abs(FF - np.max(FF, axis=1, keepdims=True)) < 1e-5) # result = np.zeros(y.shape) # for index in range(y.size) : # result[index] = m.labels[y[index]] # The following part runs on GPU cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasH = cm.CUDAMatrix(cm.reformat(m.biasH)) weightLabel = cm.CUDAMatrix(cm.reformat(m.weightLabel)) biasLabel = cm.CUDAMatrix(cm.reformat(m.biasLabel)) F = cm.CUDAMatrix(np.zeros((numCase, nClass))) Fcol = cm.CUDAMatrix(np.zeros((numCase, 1))) temp = cm.CUDAMatrix(np.zeros((numCase, nClass))) tt = cm.CUDAMatrix(np.zeros((numCase, biasH.asarray().size))) for index in range(nClass): temp.assign(0) temp.set_col_slice(index, index + 1, 1) tt = cm.dot(data, weight) tt.add_dot(temp, weightLabel) tt.add_row_vec(biasH) cm.log_1_plus_exp(tt, target=tt, exact=True) Fcol = cm.sum(tt, axis=1) Fcol.add_mult(temp.get_col_slice(index, index + 1), biasLabel.numpy_array[0, index]) F.set_col_slice(index, index + 1, Fcol) tt.free_device_memory() F.copy_to_host() [x, y] = np.where( np.abs(F.numpy_array - np.max(F.numpy_array, axis=1, keepdims=True)) < 1e-5) # free device memory data.free_device_memory() weight.free_device_memory() biasH.free_device_memory() biasLabel.free_device_memory() weightLabel.free_device_memory() F.free_device_memory() Fcol.free_device_memory() temp.free_device_memory() cm.shutdown() result = np.zeros(y.shape) for index in range(y.size): result[index] = m.labels[y[index]] return [result, F.numpy_array]
def __del__(self): cm.shutdown()
def main(): cmt.init() cmt.CUDAMatrix.init_random() if HEATUP: print "heating up for %g seconds..." % HEATUP, sys.stdout.flush() heatup(HEATUP) print "done." print "small matrix shape:", XS_SHAPE print "large matrix shape:", XL_SHAPE for funcname, func in ifilter(lambda (fn, f): fn.startswith('bench_'), getmembers(getmodule(main), isfunction)): print "%-15s" % funcname[len('bench_'):], sys.stdout.flush() for size, shape, factor in ('small', XS_SHAPE, 10), ('large', XL_SHAPE, 1): repeat = NUM_REPEATS * getattr(func, 'repeats', 1) time = min(timeit.repeat(\ setup="from __main__ import setup, %s\nmats = setup(%s)" % (funcname, shape), stmt="%s(*mats)" % funcname, repeat=repeat, number=NUM_ITER * factor)) / (NUM_ITER * factor) print "%.3es (%s) " % (time, size), sys.stdout.flush() print cmt.shutdown() if __name__ == "__main__": main()
cmt.dot(cmt.empty((200, 200)), cmt.empty((200, 200))) def main(): cmt.init() cmt.CUDAMatrix.init_random() if HEATUP: print "heating up for %g seconds..." % HEATUP, sys.stdout.flush() heatup(HEATUP) print "done." print "small matrix shape:", XS_SHAPE print "large matrix shape:", XL_SHAPE for funcname, func in ifilter(lambda (fn, f): fn.startswith('bench_'), getmembers(getmodule(main), isfunction)): print "%-15s" % funcname[len('bench_'):], sys.stdout.flush() for size, shape, factor in ('small', XS_SHAPE, 10), ('large', XL_SHAPE, 1): repeat = NUM_REPEATS * getattr(func, 'repeats', 1) time = min(timeit.repeat(\ setup="from __main__ import setup, %s\nmats = setup(%s)" % (funcname, shape), stmt="%s(*mats)" % funcname, repeat=repeat, number=NUM_ITER * factor)) / (NUM_ITER * factor) print "%.3es (%s) " % (time, size), sys.stdout.flush() print cmt.shutdown() if __name__=="__main__": main()
def main( full_vocab_file: str, repr_vocab_file: str, output: str, n_components: int, sim: str, sim_alignment_matrix: str, n_ngram: int, use_gpu: bool, processes: int, ) -> None: """Compute KPCA embeddings on a given data set.""" n = n_ngram # meh output = os.path.abspath(output) os.makedirs(output, exist_ok=True) full_vocab = _preprocess_vocab_file(full_vocab_file) if repr_vocab_file is None: repr_vocab = full_vocab else: repr_vocab = _preprocess_vocab_file(repr_vocab_file) params_path = os.path.join(output, 'training_manifest.json') secho(f'Outputting training information to {params_path}') manifest = dict( sim=sim, n=n, len_full_vocab=len(full_vocab), len_repr_vocab=len(repr_vocab), kernels=kernels, ) with open(params_path, 'w') as file: json.dump(manifest, file, sort_keys=True, indent=2) if use_gpu: import cudamat as cm cm.cublas_init() if sim == 'global-alignment': secho( f'Computing global alignment similarities with {sim_alignment_matrix}' ) repr_similarity_matrix = calculate_global_alignment_similarity_matrix( full_vocab=repr_vocab, repr_vocab=repr_vocab, processes=processes, matrix=sim_alignment_matrix, tqdm_desc=f'{EMOJI} Computing self-similarity matrix for ' f'repr vocab with global alignment ({sim_alignment_matrix})') full_similarity_matrix = calculate_global_alignment_similarity_matrix( full_vocab=full_vocab, repr_vocab=repr_vocab, processes=processes, matrix=sim_alignment_matrix, tqdm_desc=f'{EMOJI} Computing similarity matrix between ' f'full/repr vocab with global alignment ({sim_alignment_matrix})') else: alphabet = set(itt.chain.from_iterable(repr_vocab)) alphabet.add(" ") ngram_to_index = { ngram: i for i, ngram in enumerate( ["".join(t) for t in itt.product(alphabet, repeat=n)]) } if sim == "ngram_intersec": secho(f'Computing n-gram sparse similarities with {sim}') repr_similarity_matrix = compute_similarity_matrix_ngram_sparse( full_vocab=repr_vocab, repr_vocab=repr_vocab, ngram_to_index=ngram_to_index, n=n, ) full_similarity_matrix = compute_similarity_matrix_ngram_sparse( full_vocab=full_vocab, repr_vocab=repr_vocab, ngram_to_index=ngram_to_index, n=n, ) else: # sim == 'ngram_sim' secho(f'Computing n-gram similarities with {sim}') repr_similarity_matrix = compute_similarity_matrix_ngram_parallel( full_vocab=repr_vocab, repr_vocab=repr_vocab, n=n, ngram_to_index=ngram_to_index, processes=processes, # Extra because this gets multi-processed ) full_similarity_matrix = compute_similarity_matrix_ngram_parallel( full_vocab=full_vocab, repr_vocab=repr_vocab, n=n, ngram_to_index=ngram_to_index, processes=processes, # Extra because this gets multi-processed ) repr_similarity_matrix_path = os.path.join(output, f"repr_similarity_matrix.npy") secho( f"Saving the repr similarity matrix for the full vocabulary to {repr_similarity_matrix_path}" ) np.save(repr_similarity_matrix_path, repr_similarity_matrix, allow_pickle=False) full_similarity_matrix_path = os.path.join(output, f"full_similarity_matrix.npy") secho( f"Saving the full similarity matrix for the full vocabulary to {full_similarity_matrix_path}" ) np.save(full_similarity_matrix_path, full_similarity_matrix, allow_pickle=False) optim_folder = os.path.join(output, 'optim') os.makedirs(optim_folder, exist_ok=True) if n_components is None: n_components = int(0.5 + len(repr_vocab) * 2 / 3) optimize_projections( output=optim_folder, repr_similarity_matrix=repr_similarity_matrix, full_similarity_matrix=full_similarity_matrix, n_components=n_components, similarity_type=sim, use_gpu=use_gpu, ) if use_gpu: # only shut down after all loops have used this function import cudamat as cm cm.shutdown() secho(f"done. Enjoy your {make_ratvec(3)}")
def rbm(X, numHid, **kwargs): """ rbm defination data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities when type is GB, should be continuous real value. data should have a format of *.npy numHid : number nodes of and hidden layer type : rbm type, can be set as BB or GB method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis batchsize The number of training instances per batch verbose For printing progress model.type Type of RBM (i.e. type of its visible and hidden units) model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.top The activity of the top layer, to be used when training DBN's errors The errors in reconstruction at every epoch """ # when compute the transpose of a matrix, using the method *.transpose() is much space consuming. I suggest we can use # .T atrribute instead arg = util.processOptions(kwargs, \ modelType = "BB", \ method = "CD", \ eta = 0.1, \ momentum = 0.5,\ maxEpoch = 500, \ avgLast = 0, \ penalty = 0, \ batchSize = 100, \ verbose = True) [modelType, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\ arg["modelType"], \ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"] ] # from which step, we start to compute the average # avgStart = maxEpoch - avgLast # for weight decay use # oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(X.shape) if verbose: print "processing data" numVis = numDims numBatch = util.ceil(numCases, batchSize) # shuffle the data data = copy.deepcopy(X) np.random.shuffle(data) # init CUDA # cm.cuda_set_device() cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) # init weights weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1, numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1, numHid))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) for epoch in range(maxEpoch): error = [] for batch in range(numBatch): # train each data batch if batchSize * (batch + 1) > numCases: visTrue = deviceData.get_row_slice(batchSize * batch, numCases) batchSize = visTrue.shape[0] visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) else: visTrue = deviceData.get_row_slice(batchSize * batch, batchSize * (batch + 1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) #apply momentum weightInc.mult(momentum) biasVInc.mult(momentum) biasHInc.mult(momentum) # positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.add_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0) biasHInc.add_sums(hidActP, axis=0) hidState.fill_with_rand() hidState.less_than(hidActP, target=hidActP) if cmp(method, "SML") == 0: if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)): pass # here does not need in practical use elif cmp(method, "CD") == 0: pass # negetive phase if cmp(modelType, "BB") == 0: cm.dot(hidActP, weight.T, target=visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() elif cmp(modelType, "GB") == 0: cm.dot(hidActP, weight.T, target=visActP) visActP.add_row_vec(biasV) visActP.add(np.random.randn(batchSize, numVis), target=visActP) # another positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.subtract_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0, mult=-1) biasHInc.add_sums(hidActP, axis=0, mult=-1) #update weight and bias weight.add_mult(weightInc, eta / batchSize) biasV.add_mult(biasVInc, eta / batchSize) biasH.add_mult(biasHInc, eta / batchSize) # if epoch > avgStart : # # apply average # weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t)) # biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t)) # biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t)) # t = t+1 # else : # weightAgv = weight # biasVAgv = biasV # biasHAgv = biasH # reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm()**2) # free device memory visTrue.free_device_memory() if verbose: print "epoch %d/%d. Reconstruction error is %f " % ( epoch + 1, maxEpoch, sum(error)) # save rbm model top = cm.CUDAMatrix(np.zeros((numCases, numHid))) cm.dot(cm.CUDAMatrix(cm.reformat(X)), weight, target=top) top.add_row_vec(biasH) top.apply_sigmoid() weight.copy_to_host() biasV.copy_to_host() biasH.copy_to_host() top.copy_to_host() model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, \ biasH.numpy_array, type = modelType, top = top.numpy_array) # free device memory deviceData.free_device_memory() weight.free_device_memory() biasV.free_device_memory() biasH.free_device_memory() weightInc.free_device_memory() biasVInc.free_device_memory() biasHInc.free_device_memory() hidActP.free_device_memory() visActP.free_device_memory() hidState.free_device_memory() cm.shutdown() return model_
def rbm(data, numHid, modelType="BB", **kwargs): """ rbm defination data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities when type is GB, should be continuous real value. data should have a format of *.npy numHid : number nodes of and hidden layer type : rbm type, can be set as BB or GB additional inputs (specified as name value pairs or in struct) method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis penalty weight decay factor batchsize The number of training instances per batch verbose For printing progress anneal Flag. If set true, the penalty is annealed linearly through epochs to 10% of its original value OUTPUTS: model.type Type of RBM (i.e. type of its visible and hidden units) model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.top The activity of the top layer, to be used when training DBN's errors The errors in reconstruction at every epoch """ arg = util.processOptions(kwargs, \ method = "CD", \ eta = 0.1, \ momentum = 0.9,\ maxEpoch = 50, \ avgLast = 0, \ penalty = 0, \ batchSize = 50, \ verbose = True, \ anneal = False) [method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose, anneal] = [\ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"],\ arg["anneal"] ] # from which step, we start to compute the average avgStart = maxEpoch - avgLast # for weight decay use oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(data.shape) if verbose: print "processing data" numVis = numDims numBatch = util.ceil(numCases, batchSize) # shuffle the data np.random.shuffle(data) # init CUDA # cm.cuda_set_device() cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) # init weights weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1, numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1, numHid))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidActP2 = cm.empty((batchSize, numHid)) visState = cm.empty((batchSize, numVis)) hidState = cm.empty((batchSize, numHid)) t = 1 for epoch in range(maxEpoch): error = [] if anneal: # apply linear weight decay penalty = oldPenalty - 0.9 * epoch / maxEpoch * oldPenalty for batch in range(numBatch): # train each data batch if batchSize * (batch + 1) > numCases: visTrue = deviceData.get_row_slice(batchSize * batch, numCases) batchSize = visTrue.shape[0] else: visTrue = deviceData.get_row_slice(batchSize * batch, batchSize * (batch + 1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) # positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() hidState.fill_with_rand() hidState.less_than(hidActP, target=hidState) if cmp(method, "SML") == 0: if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)): pass # here does not need in practical use elif cmp(method, "CD") == 0: pass # negetive phase if cmp(modelType, "BB") == 0: cm.dot(hidState, weight.transpose(), target=visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() visState.fill_with_rand() visState.less_than(visActP, target=visState) elif cmp(modelType, "GB") == 0: cm.dot(hidState, weight.transpose(), target=visActP) visActP.add_row_vec(biasV) visActP.add(np.random.randn(batchSize, numVis), target=visState) # another positive phase cm.dot(visState, weight, target=hidActP2) hidActP2.add_row_vec(biasH) hidActP2.apply_sigmoid() hidState.fill_with_rand() hidState.less_than(hidActP2, target=hidState) #update weight and bias dWeight = cm.dot(visTrue.transpose(), hidActP) dWeight.subtract_dot(visState.transpose(), hidActP2) dBiasV = visTrue.sum(axis=0).subtract(visState.sum(axis=0)) dBiasH = hidActP.sum(axis=0).subtract(hidActP2.sum(axis=0)) dWeight.divide(batchSize).subtract(weight.mult(penalty)) dBiasV.divide(batchSize) dBiasH.divide(batchSize) weightInc.mult(momentum).add_mult(dWeight, eta) biasVInc.mult(momentum).add_mult(dBiasV, eta) biasHInc.mult(momentum).add_mult(dBiasH, eta) weight.add(weightInc) biasV.add(biasVInc) biasH.add(biasHInc) if epoch > avgStart: # apply average weightAgv.subtract(weightAgv.subtract(weight).mult(1.0 / t)) biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0 / t)) biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0 / t)) t = t + 1 else: weightAgv = weight biasVAgv = biasV biasHAgv = biasH # reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm()**2) if verbose: print "epoch %d/%d. Reconstruction error is %f " % ( epoch + 1, maxEpoch, sum(error)) # save rbm model top = cm.CUDAMatrix(np.zeros((numCases, numHid))) cm.dot(deviceData, weightAgv, target=top) top.add_row_vec(biasHAgv) top.apply_sigmoid() model_ = m.rbmModel(weightAgv, biasVAgv, biasHAgv, type=modelType, top=top) cm.shutdown() return model_
def matrix_factorization_clustering(X_aux, k, l, norm=False, num_iters=100): cm.cublas_init() m, n = X_aux.shape U = cm.CUDAMatrix(np.random.rand(m, k)) S = cm.CUDAMatrix(np.random.rand(k, l)) V = cm.CUDAMatrix(np.random.rand(n, l)) X = cm.CUDAMatrix(X_aux) # if norm: # X = Normalizer().fit_transform(X) XV = cm.CUDAMatrix(np.random.rand(m, l)) XVSt = cm.CUDAMatrix(np.random.rand(m, k)) US = cm.CUDAMatrix(np.random.rand(m, l)) USVt = cm.CUDAMatrix(np.random.rand(m, n)) USVtXt = cm.CUDAMatrix(np.random.rand(m, m)) USVtXtU = cm.CUDAMatrix(np.random.rand(m, k)) U_aux = cm.CUDAMatrix(np.random.rand(m, k)) XtUS = cm.CUDAMatrix(np.random.rand(m, l)) VSt = cm.CUDAMatrix(np.random.rand(n, k)) VStUt = cm.CUDAMatrix(np.random.rand(n, m)) UtX = cm.CUDAMatrix(np.random.rand(k, n)) VStUtXV = cm.CUDAMatrix(np.random.rand(n, l)) V_aux = cm.CUDAMatrix(np.random.rand(n, l)) UtXV = cm.CUDAMatrix(np.random.rand(k, l)) UtUS = cm.CUDAMatrix(np.random.rand(k, l)) UtUSVt = cm.CUDAMatrix(np.random.rand(k, n)) UtUSVtV = cm.CUDAMatrix(np.random.rand(k, l)) S_aux = cm.CUDAMatrix(np.random.rand(k, l)) error_best = np.inf error = np.inf for i in range(num_iters): # compute U cm.dot(X, V, target=XV) cm.dot(XV, S.T, target=XVSt) if i is 0: cm.dot(U, S, target=US) cm.dot(US, V.T, target=USVt) cm.dot(USVt, X.T, target=USVtXt) cm.dot(USVtXt, U, target=USVtXtU) cm.divide(XVSt, USVtXtU, U_aux) cm.mult(U, U_aux, U) # compute V cm.dot(U, S, target=US) cm.dot(X.T, US, target=XtUS) cm.dot(V, S.T, target=VSt) cm.dot(VSt, U.T, target=VStUt) cm.dot(VStUt, XV, target=VStUtXV) cm.divide(XtUS, VStUtXV, target=V_aux) cm.mult(V, V_aux, V) # compute S cm.dot(U.T, X, target=UtX) cm.dot(UtX, V, target=UtXV) cm.dot(U.T, US, target=UtUS) cm.dot(UtUS, V.T, UtUSVt) cm.dot(UtUSVt, V, target=UtUSVtV) cm.divide(UtXV, UtUSVtV, target=S_aux) cm.mult(S, S_aux, target=S) error_ant = error cm.dot(U, S, target=US) cm.dot(US, V.T, target=USVt) error = cm.sum(cm.pow(cm.subtract(X, USVt), 2), axis=0) if error < error_best: U_best_cm = U S_best_cm = S V_best_cm = V error_best = error if np.abs(error - error_ant) <= 0.000001: break U_best = U_best_cm.asarray() S_best = S_best_cm.asarray() V_best = V_best_cm.asarray() Du = np.diag(np.ones(m).dot(U_best)) Dv = np.diag(np.ones(n).dot(V_best)) U_norm = U_best.dot( np.diag(S_best.dot(Dv).dot(np.ones(l))) ) V_norm = V_best.dot( np.diag(np.ones(k).dot(Du).dot(S_best)) ) rows_ind = np.argmax(U_best, axis=1) cols_ind = np.argmax(V_best, axis=1) cm.shutdown() return U_norm, S_best, V_norm, rows_ind, cols_ind, error_best
def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs) : """ X ... data. should be binary, or in [0,1] interpreted as ... probabilities numhid ... number of hidden units y ... List of discrete labels nClass number of classes method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis batchsize The number of training instances per batch verbose For printing progress model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.weightlabel ... The weights on labels layer model.biasLabel ... The biases on labels layer errors The errors in reconstruction at each epoch """ arg = util.processOptions(kwargs, \ nClass = np.unique(y).size, \ method = "CD", \ eta = 0.1, \ momentum = 0.5,\ maxEpoch = 500, \ avgLast = 0, \ penalty = 0, \ batchSize = 100, \ verbose = True) [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\ arg["nClass"],\ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"] ] if verbose : print "Processing data ..." # from which step, we start to compute the average # avgStart = maxEpoch - avgLast # for weight decay use # oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(X.shape) numVis = numDims uniqueLabel = np.unique(y) numBatch = util.ceil(numCases, batchSize) y = util.matrixLabel(y) # shuffle data and label data = copy.deepcopy(X) [data, label] = util.shuffle(data, y) # init CUDA cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) deviceLabel = cm.CUDAMatrix(cm.reformat(label)) # init weights weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) weightLabel = cm.CUDAMatrix(0.1*np.random.randn(nClass, numHid)) biasLabel = cm.CUDAMatrix(np.zeros((1,nClass))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1,numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1,numHid))) weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid))) biasLabelInc = cm.CUDAMatrix(np.zeros((1,nClass))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) for epoch in range(maxEpoch) : error = [] for batch in range(numBatch) : # train each data batch if batchSize*(batch+1) > numCases : visTrue = deviceData.get_row_slice(batchSize*batch, numCases) labelTrue = deviceLabel.get_row_slice(batchSize*batch, numCases) batchSize = visTrue.shape[0] visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) else : visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1)) labelTrue = deviceLabel.get_row_slice(batchSize*batch, batchSize*(batch+1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) #apply momentum weightInc.mult(momentum) biasVInc.mult(momentum) biasHInc.mult(momentum) weightLabel.mult(momentum) biasLabel.mult(momentum) # positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_dot(labelTrue, weightLabel) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.add_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0) biasHInc.add_sums(hidActP, axis=0) weightLabelInc.add_dot(labelTrue.T, hidActP) biasLabelInc.add_sums(labelTrue, axis=0) hidState.fill_with_rand() hidState.less_than(hidActP, target=hidActP) if cmp(method, "SML") == 0 : if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) : pass # here does not need in practical use elif cmp(method, "CD") == 0 : pass # negative phase cm.dot(hidActP, weight.T, target = visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() cm.dot(hidActP, weightLabel.T, target = labelTrue) labelTrue.add_row_vec(biasLabel) labelTrue = util.softmax(labelTrue) # another positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_dot(labelTrue, weightLabel) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.subtract_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0, mult=-1) biasHInc.add_sums(hidActP, axis=0, mult=-1) weightLabelInc.subtract_dot(labelTrue.T, hidActP) biasLabelInc.add_sums(labelTrue, axis=0, mult=-1) # update weights and bias weight.add_mult(weightInc, eta/batchSize) biasV.add_mult(biasVInc, eta/batchSize) biasH.add_mult(biasHInc, eta/batchSize) weightLabel.add_mult(weightLabelInc, eta/batchSize) biasLabel.add_mult(biasLabelInc, eta/batchSize) # calculate reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm()**2) # free memory visTrue.free_device_memory() labelTrue.free_device_memory() if verbose : print "Epoch %d/%d, reconstruction error is %f " % (epoch+1, maxEpoch, sum(error)) # save rbm model weight.copy_to_host() biasV.copy_to_host() biasH.copy_to_host() weightLabel.copy_to_host() biasLabel.copy_to_host() model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \ weightLabel = weightLabel.numpy_array,\ biasLabel = biasLabel.numpy_array, labels = uniqueLabel) # free device memory deviceData.free_device_memory() deviceLabel.free_device_memory() weight.free_device_memory() biasV.free_device_memory() biasH.free_device_memory() weightLabel.free_device_memory() biasLabel.free_device_memory() weightInc.free_device_memory() biasVInc.free_device_memory() biasHInc.free_device_memory() weightLabelInc.free_device_memory() biasLabelInc.free_device_memory() hidActP.free_device_memory() visActP.free_device_memory() hidState.free_device_memory() cm.shutdown() if isSaveModel : modelList = [] modelList.append(model_) model = np.array(modelList) np.save(name,model) return model_
def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs): """ X ... data. should be binary, or in [0,1] interpreted as ... probabilities numhid ... number of hidden units y ... List of discrete labels nClass number of classes method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis batchsize The number of training instances per batch verbose For printing progress model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.weightlabel ... The weights on labels layer model.biasLabel ... The biases on labels layer errors The errors in reconstruction at each epoch """ arg = util.processOptions(kwargs, \ nClass = np.unique(y).size, \ method = "CD", \ eta = 0.1, \ momentum = 0.5,\ maxEpoch = 500, \ avgLast = 0, \ penalty = 0, \ batchSize = 100, \ verbose = True) [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\ arg["nClass"],\ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"] ] if verbose: print "Processing data ..." # from which step, we start to compute the average # avgStart = maxEpoch - avgLast # for weight decay use # oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(X.shape) numVis = numDims uniqueLabel = np.unique(y) numBatch = util.ceil(numCases, batchSize) y = util.matrixLabel(y) # shuffle data and label data = copy.deepcopy(X) [data, label] = util.shuffle(data, y) # init CUDA cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) deviceLabel = cm.CUDAMatrix(cm.reformat(label)) # init weights weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) weightLabel = cm.CUDAMatrix(0.1 * np.random.randn(nClass, numHid)) biasLabel = cm.CUDAMatrix(np.zeros((1, nClass))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1, numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1, numHid))) weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid))) biasLabelInc = cm.CUDAMatrix(np.zeros((1, nClass))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) for epoch in range(maxEpoch): error = [] for batch in range(numBatch): # train each data batch if batchSize * (batch + 1) > numCases: visTrue = deviceData.get_row_slice(batchSize * batch, numCases) labelTrue = deviceLabel.get_row_slice(batchSize * batch, numCases) batchSize = visTrue.shape[0] visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) else: visTrue = deviceData.get_row_slice(batchSize * batch, batchSize * (batch + 1)) labelTrue = deviceLabel.get_row_slice(batchSize * batch, batchSize * (batch + 1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) #apply momentum weightInc.mult(momentum) biasVInc.mult(momentum) biasHInc.mult(momentum) weightLabel.mult(momentum) biasLabel.mult(momentum) # positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_dot(labelTrue, weightLabel) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.add_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0) biasHInc.add_sums(hidActP, axis=0) weightLabelInc.add_dot(labelTrue.T, hidActP) biasLabelInc.add_sums(labelTrue, axis=0) hidState.fill_with_rand() hidState.less_than(hidActP, target=hidActP) if cmp(method, "SML") == 0: if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)): pass # here does not need in practical use elif cmp(method, "CD") == 0: pass # negative phase cm.dot(hidActP, weight.T, target=visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() cm.dot(hidActP, weightLabel.T, target=labelTrue) labelTrue.add_row_vec(biasLabel) labelTrue = util.softmax(labelTrue) # another positive phase cm.dot(visActP, weight, target=hidActP) hidActP.add_dot(labelTrue, weightLabel) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.subtract_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0, mult=-1) biasHInc.add_sums(hidActP, axis=0, mult=-1) weightLabelInc.subtract_dot(labelTrue.T, hidActP) biasLabelInc.add_sums(labelTrue, axis=0, mult=-1) # update weights and bias weight.add_mult(weightInc, eta / batchSize) biasV.add_mult(biasVInc, eta / batchSize) biasH.add_mult(biasHInc, eta / batchSize) weightLabel.add_mult(weightLabelInc, eta / batchSize) biasLabel.add_mult(biasLabelInc, eta / batchSize) # calculate reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm()**2) # free memory visTrue.free_device_memory() labelTrue.free_device_memory() if verbose: print "Epoch %d/%d, reconstruction error is %f " % ( epoch + 1, maxEpoch, sum(error)) # save rbm model weight.copy_to_host() biasV.copy_to_host() biasH.copy_to_host() weightLabel.copy_to_host() biasLabel.copy_to_host() model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \ weightLabel = weightLabel.numpy_array,\ biasLabel = biasLabel.numpy_array, labels = uniqueLabel) # free device memory deviceData.free_device_memory() deviceLabel.free_device_memory() weight.free_device_memory() biasV.free_device_memory() biasH.free_device_memory() weightLabel.free_device_memory() biasLabel.free_device_memory() weightInc.free_device_memory() biasVInc.free_device_memory() biasHInc.free_device_memory() weightLabelInc.free_device_memory() biasLabelInc.free_device_memory() hidActP.free_device_memory() visActP.free_device_memory() hidState.free_device_memory() cm.shutdown() if isSaveModel: modelList = [] modelList.append(model_) model = np.array(modelList) np.save(name, model) return model_
def rbm(data, numHid, modelType = "BB", **kwargs) : """ rbm defination data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities when type is GB, should be continuous real value. data should have a format of *.npy numHid : number nodes of and hidden layer type : rbm type, can be set as BB or GB additional inputs (specified as name value pairs or in struct) method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis penalty weight decay factor batchsize The number of training instances per batch verbose For printing progress anneal Flag. If set true, the penalty is annealed linearly through epochs to 10% of its original value OUTPUTS: model.type Type of RBM (i.e. type of its visible and hidden units) model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.top The activity of the top layer, to be used when training DBN's errors The errors in reconstruction at every epoch """ arg = util.processOptions(kwargs, \ method = "CD", \ eta = 0.1, \ momentum = 0.9,\ maxEpoch = 50, \ avgLast = 0, \ penalty = 0, \ batchSize = 50, \ verbose = True, \ anneal = False) [method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose, anneal] = [\ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"],\ arg["anneal"] ] # from which step, we start to compute the average avgStart = maxEpoch - avgLast # for weight decay use oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(data.shape) if verbose : print "processing data" numVis = numDims numBatch = util.ceil(numCases,batchSize) # shuffle the data np.random.shuffle(data) # init CUDA # cm.cuda_set_device() cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) # init weights weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1,numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1,numHid))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidActP2 = cm.empty((batchSize, numHid)) visState = cm.empty((batchSize,numVis)) hidState = cm.empty((batchSize, numHid)) t = 1 for epoch in range(maxEpoch) : error = [] if anneal : # apply linear weight decay penalty = oldPenalty - 0.9 *epoch/maxEpoch*oldPenalty for batch in range(numBatch) : # train each data batch if batchSize*(batch+1) > numCases : visTrue = deviceData.get_row_slice(batchSize*batch, numCases) batchSize = visTrue.shape[0] else : visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) # positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() hidState.fill_with_rand() hidState.less_than(hidActP, target=hidState) if cmp(method, "SML") == 0 : if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) : pass # here does not need in practical use elif cmp(method, "CD") == 0 : pass # negetive phase if cmp(modelType, "BB") == 0 : cm.dot(hidState, weight.transpose(), target = visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() visState.fill_with_rand() visState.less_than(visActP, target = visState) elif cmp(modelType, "GB") == 0 : cm.dot(hidState, weight.transpose(), target = visActP) visActP.add_row_vec(biasV) visActP.add(np.random.randn(batchSize, numVis),target=visState) # another positive phase cm.dot(visState, weight, target = hidActP2) hidActP2.add_row_vec(biasH) hidActP2.apply_sigmoid() hidState.fill_with_rand() hidState.less_than(hidActP2, target=hidState) #update weight and bias dWeight = cm.dot(visTrue.transpose(), hidActP) dWeight.subtract_dot(visState.transpose(), hidActP2) dBiasV = visTrue.sum(axis = 0).subtract(visState.sum(axis = 0)) dBiasH = hidActP.sum(axis=0).subtract(hidActP2.sum(axis = 0)) dWeight.divide(batchSize).subtract(weight.mult(penalty)) dBiasV.divide(batchSize) dBiasH.divide(batchSize) weightInc.mult(momentum).add_mult(dWeight, eta) biasVInc.mult(momentum).add_mult(dBiasV, eta) biasHInc.mult(momentum).add_mult(dBiasH, eta) weight.add(weightInc) biasV.add(biasVInc) biasH.add(biasHInc) if epoch > avgStart : # apply average weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t)) biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t)) biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t)) t = t+1 else : weightAgv = weight biasVAgv = biasV biasHAgv = biasH # reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm() ** 2) if verbose : print "epoch %d/%d. Reconstruction error is %f " % (epoch+1, maxEpoch, sum(error)) # save rbm model top = cm.CUDAMatrix(np.zeros((numCases, numHid))) cm.dot(deviceData, weightAgv, target = top) top.add_row_vec(biasHAgv) top.apply_sigmoid() model_ = m.rbmModel(weightAgv,biasVAgv,biasHAgv,type = modelType,top = top) cm.shutdown() return model_
def shutdown(): '''Free GPU resources''' cm.shutdown()
def rbmPredict(m, X): """using trained rbm model to do prediction""" nClass = m.labels.size numCase = X.shape[0] # This part is executed on CPU # define the free energy # FF = np.zeros((numCase, nClass)) # FFcol = np.zeros((numCase, 1)) # for index in range(nClass) : # temp = np.zeros((numCase, nClass)) # temp[:, index] = 1 # # tt = np.emath.log(np.exp(np.dot(X, m.weight)+ np.dot(temp, m.weightLabel) + m.biasH)+1) # # FFcol = temp[:,index] * m.biasLabel[0,index] + np.sum(tt,axis = 1) # # FF[:, index] = FFcol # # [x, y] = np.where(np.abs(FF - np.max(FF, axis=1, keepdims=True)) < 1e-5) # result = np.zeros(y.shape) # for index in range(y.size) : # result[index] = m.labels[y[index]] # The following part runs on GPU cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasH = cm.CUDAMatrix(cm.reformat(m.biasH)) weightLabel = cm.CUDAMatrix(cm.reformat(m.weightLabel)) biasLabel = cm.CUDAMatrix(cm.reformat(m.biasLabel)) F = cm.CUDAMatrix(np.zeros((numCase, nClass))) Fcol = cm.CUDAMatrix(np.zeros((numCase, 1))) temp = cm.CUDAMatrix(np.zeros((numCase, nClass))) tt = cm.CUDAMatrix(np.zeros((numCase, biasH.asarray().size))) for index in range(nClass): temp.assign(0) temp.set_col_slice(index, index + 1, 1) tt = cm.dot(data, weight) tt.add_dot(temp, weightLabel) tt.add_row_vec(biasH) cm.log_1_plus_exp(tt, target=tt, exact=True) Fcol = cm.sum(tt, axis=1) Fcol.add_mult(temp.get_col_slice(index, index + 1), biasLabel.numpy_array[0, index]) F.set_col_slice(index, index + 1, Fcol) tt.free_device_memory() F.copy_to_host() [x, y] = np.where(np.abs(F.numpy_array - np.max(F.numpy_array, axis=1, keepdims=True)) < 1e-5) # free device memory data.free_device_memory() weight.free_device_memory() biasH.free_device_memory() biasLabel.free_device_memory() weightLabel.free_device_memory() F.free_device_memory() Fcol.free_device_memory() temp.free_device_memory() cm.shutdown() result = np.zeros(y.shape) for index in range(y.size): result[index] = m.labels[y[index]] return [result, F.numpy_array]
def rbm(X, numHid, **kwargs) : """ rbm defination data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities when type is GB, should be continuous real value. data should have a format of *.npy numHid : number nodes of and hidden layer type : rbm type, can be set as BB or GB method CD or SML eta learning rate momentum momentum for smoothness amd to prevent overfitting NOTE: momentum is not recommended with SML maxepoch # of epochs: each is a full pass through train data avglast how many epochs before maxepoch to start averaging before. Procedure suggested for faster convergence by Kevin Swersky in his MSc thesis batchsize The number of training instances per batch verbose For printing progress model.type Type of RBM (i.e. type of its visible and hidden units) model.weight The weights of the connections model.biasH The biases of the hidden layer model.biasV The biases of the visible layer model.top The activity of the top layer, to be used when training DBN's errors The errors in reconstruction at every epoch """ # when compute the transpose of a matrix, using the method *.transpose() is much space consuming. I suggest we can use # .T atrribute instead arg = util.processOptions(kwargs, \ modelType = "BB", \ method = "CD", \ eta = 0.1, \ momentum = 0.5,\ maxEpoch = 500, \ avgLast = 0, \ penalty = 0, \ batchSize = 100, \ verbose = True) [modelType, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\ arg["modelType"], \ arg["method"],\ arg["eta"],\ arg["momentum"],\ arg["maxEpoch"],\ arg["avgLast"],\ arg["penalty"],\ arg["batchSize"],\ arg["verbose"] ] # from which step, we start to compute the average # avgStart = maxEpoch - avgLast # for weight decay use # oldPenalty = penalty # numCases : number of example # numDims : the length of each example # each row is an example [numCases, numDims] = list(X.shape) if verbose : print "processing data" numVis = numDims numBatch = util.ceil(numCases, batchSize) # shuffle the data data = copy.deepcopy(X) np.random.shuffle(data) # init CUDA # cm.cuda_set_device() cm.cublas_init() cm.CUDAMatrix.init_random(100) deviceData = cm.CUDAMatrix(cm.reformat(data)) # init weights weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid)) biasV = cm.CUDAMatrix(np.zeros((1, numVis))) biasH = cm.CUDAMatrix(np.zeros((1, numHid))) # init weight update weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid))) biasVInc = cm.CUDAMatrix(np.zeros((1,numVis))) biasHInc = cm.CUDAMatrix(np.zeros((1,numHid))) #init temporary storage visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) for epoch in range(maxEpoch) : error = [] for batch in range(numBatch) : # train each data batch if batchSize*(batch+1) > numCases : visTrue = deviceData.get_row_slice(batchSize*batch, numCases) batchSize = visTrue.shape[0] visActP = cm.empty((batchSize, numVis)) hidActP = cm.empty((batchSize, numHid)) hidState = cm.empty((batchSize, numHid)) else : visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1)) batchSize = visTrue.shape[0] visActP.assign(visTrue) #apply momentum weightInc.mult(momentum) biasVInc.mult(momentum) biasHInc.mult(momentum) # positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.add_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0) biasHInc.add_sums(hidActP, axis=0) hidState.fill_with_rand() hidState.less_than(hidActP, target=hidActP) if cmp(method, "SML") == 0 : if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) : pass # here does not need in practical use elif cmp(method, "CD") == 0 : pass # negetive phase if cmp(modelType, "BB") == 0 : cm.dot(hidActP, weight.T, target = visActP) visActP.add_row_vec(biasV) visActP.apply_sigmoid() elif cmp(modelType, "GB") == 0 : cm.dot(hidActP, weight.T, target = visActP) visActP.add_row_vec(biasV) visActP.add(np.random.randn(batchSize, numVis),target=visActP) # another positive phase cm.dot(visActP, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() weightInc.subtract_dot(visActP.T, hidActP) biasVInc.add_sums(visActP, axis=0, mult=-1) biasHInc.add_sums(hidActP, axis=0, mult=-1) #update weight and bias weight.add_mult(weightInc, eta/batchSize) biasV.add_mult(biasVInc, eta/batchSize) biasH.add_mult(biasHInc, eta/batchSize) # if epoch > avgStart : # # apply average # weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t)) # biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t)) # biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t)) # t = t+1 # else : # weightAgv = weight # biasVAgv = biasV # biasHAgv = biasH # reconstruction error visTrue.subtract(visActP) error.append(visTrue.euclid_norm() ** 2) # free device memory visTrue.free_device_memory() if verbose : print "epoch %d/%d. Reconstruction error is %f " % (epoch+1, maxEpoch, sum(error)) # save rbm model top = cm.CUDAMatrix(np.zeros((numCases, numHid))) cm.dot(cm.CUDAMatrix(cm.reformat(X)), weight, target = top) top.add_row_vec(biasH) top.apply_sigmoid() weight.copy_to_host() biasV.copy_to_host() biasH.copy_to_host() top.copy_to_host() model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, \ biasH.numpy_array, type = modelType, top = top.numpy_array) # free device memory deviceData.free_device_memory() weight.free_device_memory() biasV.free_device_memory() biasH.free_device_memory() weightInc.free_device_memory() biasVInc.free_device_memory() biasHInc.free_device_memory() hidActP.free_device_memory() visActP.free_device_memory() hidState.free_device_memory() cm.shutdown() return model_
open( outputPath + "/lambdas_{}_{}_{}_{}_{}.p".format( eval(args.sim).__name__, len(reprVocab), kernel, hyperparam, n_components), "wb")) ''' Projection to KPCA embeddings of the vocabulary ''' with codecs.open(vocabPath, "r") as fIn: vocab = [normalize_word(w[:-1]) for w in fIn if len(w[:-1].split()) == 1] termcolor.cprint("Projecting known vocabulary to KPCA embeddings\n", "blue") alphas_lambdas_div = alphas / lambdas if useGPU: X_train = projectWordsGPU(vocab, reprVocab, hyperparam, alphas_lambdas_div, kernel) cm.shutdown() else: X_train = pool.map( projectWordTup, [(word, reprVocab, hyperparam, alphas_lambdas_div, kernel) for word in vocab]) pickle.dump( X_train, open( outputPath + "/KPCA_{}_{}_{}_{}_{}.p".format( eval(args.sim).__name__, len(reprVocab), kernel, hyperparam, n_components), "wb"))
def matrix_factorization_clustering(X_aux, k, l, norm=False, num_iters=100): cm.cublas_init() m, n = X_aux.shape U = cm.CUDAMatrix(np.random.rand(m, k)) S = cm.CUDAMatrix(np.random.rand(k, l)) V = cm.CUDAMatrix(np.random.rand(n, l)) X = cm.CUDAMatrix(X_aux) # if norm: # X = Normalizer().fit_transform(X) XV = cm.CUDAMatrix(np.random.rand(m, l)) XVSt = cm.CUDAMatrix(np.random.rand(m, k)) US = cm.CUDAMatrix(np.random.rand(m, l)) USVt = cm.CUDAMatrix(np.random.rand(m, n)) USVtXt = cm.CUDAMatrix(np.random.rand(m, m)) USVtXtU = cm.CUDAMatrix(np.random.rand(m, k)) U_aux = cm.CUDAMatrix(np.random.rand(m, k)) XtUS = cm.CUDAMatrix(np.random.rand(m, l)) VSt = cm.CUDAMatrix(np.random.rand(n, k)) VStUt = cm.CUDAMatrix(np.random.rand(n, m)) UtX = cm.CUDAMatrix(np.random.rand(k, n)) VStUtXV = cm.CUDAMatrix(np.random.rand(n, l)) V_aux = cm.CUDAMatrix(np.random.rand(n, l)) UtXV = cm.CUDAMatrix(np.random.rand(k, l)) UtUS = cm.CUDAMatrix(np.random.rand(k, l)) UtUSVt = cm.CUDAMatrix(np.random.rand(k, n)) UtUSVtV = cm.CUDAMatrix(np.random.rand(k, l)) S_aux = cm.CUDAMatrix(np.random.rand(k, l)) error_best = np.inf error = np.inf for i in range(num_iters): # compute U cm.dot(X, V, target=XV) cm.dot(XV, S.T, target=XVSt) if i is 0: cm.dot(U, S, target=US) cm.dot(US, V.T, target=USVt) cm.dot(USVt, X.T, target=USVtXt) cm.dot(USVtXt, U, target=USVtXtU) cm.divide(XVSt, USVtXtU, U_aux) cm.mult(U, U_aux, U) # compute V cm.dot(U, S, target=US) cm.dot(X.T, US, target=XtUS) cm.dot(V, S.T, target=VSt) cm.dot(VSt, U.T, target=VStUt) cm.dot(VStUt, XV, target=VStUtXV) cm.divide(XtUS, VStUtXV, target=V_aux) cm.mult(V, V_aux, V) # compute S cm.dot(U.T, X, target=UtX) cm.dot(UtX, V, target=UtXV) cm.dot(U.T, US, target=UtUS) cm.dot(UtUS, V.T, UtUSVt) cm.dot(UtUSVt, V, target=UtUSVtV) cm.divide(UtXV, UtUSVtV, target=S_aux) cm.mult(S, S_aux, target=S) error_ant = error cm.dot(U, S, target=US) cm.dot(US, V.T, target=USVt) error = cm.sum(cm.pow(cm.subtract(X, USVt), 2), axis=0) if error < error_best: U_best_cm = U S_best_cm = S V_best_cm = V error_best = error if np.abs(error - error_ant) <= 0.000001: break U_best = U_best_cm.asarray() S_best = S_best_cm.asarray() V_best = V_best_cm.asarray() Du = np.diag(np.ones(m).dot(U_best)) Dv = np.diag(np.ones(n).dot(V_best)) U_norm = U_best.dot(np.diag(S_best.dot(Dv).dot(np.ones(l)))) V_norm = V_best.dot(np.diag(np.ones(k).dot(Du).dot(S_best))) rows_ind = np.argmax(U_best, axis=1) cols_ind = np.argmax(V_best, axis=1) cm.shutdown() return U_norm, S_best, V_norm, rows_ind, cols_ind, error_best