def rbm(threadName, data): n_visible = data.shape[1] n_hidden = 4 # the rbm type rbm = rbms.BinaryBinaryRBM(n_visible, n_hidden) initial_vmap = { rbm.v: T.matrix('v') } # We use single-step contrastive divergence (CD-1) to train the RBM. For this, we can use # the CDUpdater. This requires symbolic CD-1 statistics: s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=2) # We create an updater for each parameter variable umap = {} for var in rbm.variables: # the learning rate is 0.001 pu = var + learning_rate * updaters.CDUpdater(rbm, var, s) umap[var] = pu # training t = trainers.MinibatchTrainer(rbm, umap) mse = monitors.reconstruction_mse(s, rbm.v) train = t.compile_function(initial_vmap, mb_size=1, monitors=[mse], name='train', mode=mode) # run for every sample point h,w = data.shape # costs for each thread costs_1 = [] costs_2 = [] costs_3 = [] costs_4 = [] # train the model for epoch in xrange(epochs): for i in xrange(h): # get cost for training the data point costs = [m for m in train({ rbm.v: data[i,:].reshape(1,n_visible) })] # print "MSE = %.4f, thread = %s" % (np.mean(costs), threadName) if threadName == "Thread-1": costs_1.append(np.mean(costs)) elif threadName == "Thread-2": costs_2.append(np.mean(costs)) elif threadName == "Thread-3": costs_3.append(np.mean(costs)) else: costs_4.append(np.mean(costs)) # all_costs = [] # for i in xrange(h): # # get cost for training the data point across all epochs # cost_point = [] # for epoch in xrange(epochs): # cost = [m for m in train({ rbm.v: data[i,:].reshape(1,40) })] # print epoch, cost, threadName, i # cost_point.append(cost) # # all_costs.append(cost_point) return costs_1, costs_2, costs_3, costs_4
rbm.F = factors.Factor(rbm, name='F') # IMPORTANT: the following parameters instances are associated with the FACTOR rbm.F, and not with the RBM itself. rbm.Wv = parameters.ProdParameters(rbm.F, [rbm.v, rbm.F], Wv, name='Wv') rbm.Wh = parameters.ProdParameters(rbm.F, [rbm.h, rbm.F], Wh, name='Wh') rbm.Wx = parameters.ProdParameters(rbm.F, [rbm.x, rbm.F], Wx, name='Wx') rbm.F.initialize() # done adding parameters to rbm.F rbm.bv = parameters.BiasParameters(rbm, rbm.v, theano.shared(value = initial_bv, name='bv'), name='bv') # visible bias rbm.bh = parameters.BiasParameters(rbm, rbm.h, theano.shared(value = initial_bh, name='bh'), name='bh') # hidden bias initial_vmap = { rbm.v: T.matrix('v'), rbm.x: T.matrix('x') } # try to calculate weight updates using CD-1 stats print ">> Constructing contrastive divergence updaters..." s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], context_units=[rbm.x], k=1, mean_field_for_gibbs=[rbm.v], mean_field_for_stats=[rbm.v]) umap = {} for var in rbm.variables: pu = var + 0.0005 * updaters.CDUpdater(rbm, var, s) umap[var] = pu print ">> Compiling functions..." t = trainers.MinibatchTrainer(rbm, umap) m = monitors.reconstruction_mse(s, rbm.v) mce = monitors.reconstruction_crossentropy(s, rbm.v) # train = t.compile_function(initial_vmap, mb_size=32, monitors=[m], name='train', mode=mode) train = t.compile_function(initial_vmap, mb_size=32, monitors=[m, mce], name='train', mode=mode) evaluate = t.compile_function(initial_vmap, mb_size=32, monitors=[m, mce], train=False, name='evaluate', mode=mode)
# mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False) mode = None # generate data data = generate_data(200) # np.random.randint(2, size=(10000, n_visible)) n_visible = data.shape[1] n_hidden = 100 rbm = rbms.BinaryBinaryRBM(n_visible, n_hidden) initial_vmap = { rbm.v: T.matrix('v') } # try to calculate weight updates using CD-1 stats s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=1) umap = {} for var, shape in zip([rbm.W.var, rbm.bv.var, rbm.bh.var], [(rbm.n_visible, rbm.n_hidden), (rbm.n_visible,), (rbm.n_hidden,)]): # pu = 0.001 * (param_updaters.CDParamUpdater(params, sc) + 0.02 * param_updaters.DecayParamUpdater(params)) pu = updaters.CDUpdater(rbm, var, s) pu = var + 0.0001 * updaters.MomentumUpdater(pu, 0.9, shape) umap[var] = pu t = trainers.MinibatchTrainer(rbm, umap) m = monitors.reconstruction_mse(s, rbm.v) train = t.compile_function(initial_vmap, mb_size=32, monitors=[m], name='train', mode=mode)
def morbrun1(f1=1, f2=1, v1=1, v2=1, kern = 1): test_set_x = np.array(eval_print1).flatten(order='C') valid_set_x = np.array(eval_print3).flatten(order='C') train_set_x = np.array(eval_print2).flatten(order='C') train_set_x = train_set_x.reshape(np.array(eval_print2).shape[0]*batchm,kern,v1,v2) valid_set_x = valid_set_x.reshape(np.array(eval_print3).shape[0]*batchm,kern,v1,v2) test_set_x = test_set_x.reshape(np.array(eval_print1).shape[0]*batchm,kern,v1,v2) visible_maps = kern hidden_maps = neuron filter_height = f1 filter_width = f2 mb_size = batchm # 1 minibatch print(">> Constructing RBM...") fan_in = visible_maps * filter_height * filter_width """ initial_W = numpy.asarray( self.numpy_rng.uniform( low = - numpy.sqrt(3./fan_in), high = numpy.sqrt(3./fan_in), size = self.filter_shape ), dtype=theano.config.floatX) """ numpy_rng = np.random.RandomState(123) initial_W = np.asarray( numpy_rng.normal( 0, 0.5 / np.sqrt(fan_in), size = (hidden_maps, visible_maps, filter_height, filter_width) ), dtype=theano.config.floatX) initial_bv = np.zeros(visible_maps, dtype = theano.config.floatX) initial_bh = np.zeros(hidden_maps, dtype = theano.config.floatX) shape_info = { 'hidden_maps': hidden_maps, 'visible_maps': visible_maps, 'filter_height': filter_height, 'filter_width': filter_width, 'visible_height': v1, #45+8, 'visible_width': v2, #30, 'mb_size': mb_size } # rbms.SigmoidBinaryRBM(n_visible, n_hidden) rbm = base.RBM() rbm.v = units.BinaryUnits(rbm, name='v') # visibles rbm.h = units.BinaryUnits(rbm, name='h') # hiddens rbm.W = parameters.Convolutional2DParameters(rbm, [rbm.v, rbm.h], theano.shared(value=initial_W, name='W'), name='W', shape_info=shape_info) # one bias per map (so shared across width and height): rbm.bv = parameters.SharedBiasParameters(rbm, rbm.v, 3, 2, theano.shared(value=initial_bv, name='bv'), name='bv') rbm.bh = parameters.SharedBiasParameters(rbm, rbm.h, 3, 2, theano.shared(value=initial_bh, name='bh'), name='bh') initial_vmap = { rbm.v: T.tensor4('v') } # try to calculate weight updates using CD-1 stats print(">> Constructing contrastive divergence updaters...") s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=5, mean_field_for_stats=[rbm.v], mean_field_for_gibbs=[rbm.v]) lr_cd = 0.001 if indk == -1: lr_cd = 0 umap = {} for var in rbm.variables: pu = var + lr_cd * updaters.CDUpdater(rbm, var, s) umap[var] = pu print(">> Compiling functions...") t = trainers.MinibatchTrainer(rbm, umap) m = monitors.reconstruction_mse(s, rbm.v) e_data = rbm.energy(s['data']).mean() e_model = rbm.energy(s['model']).mean() # train = t.compile_function(initial_vmap, mb_size=32, monitors=[m], name='train', mode=mode) train = t.compile_function(initial_vmap, mb_size=mb_size, monitors=[m, e_data, e_model], name='train', mode=mode) # TRAINING epochs = epoch_cd print(">> Training for %d epochs..." % epochs) for epoch in range(epochs): monitoring_data_train = [(cost, energy_data, energy_model) for cost, energy_data, energy_model in train({ rbm.v: train_set_x })] mses_train, edata_train_list, emodel_train_list = zip(*monitoring_data_train) lay1w = rbm.W.var.get_value() Wl = theano.shared(lay1w) lay1bh = rbm.bh.var.get_value() bhl = theano.shared(lay1bh) return [Wl, bhl]
def rbm(threadName, data): n_visible = data.shape[1] n_hidden = 4 # the rbm type rbm = rbms.BinaryBinaryRBM(n_visible, n_hidden) initial_vmap = {rbm.v: T.matrix('v')} # We use single-step contrastive divergence (CD-1) to train the RBM. For this, we can use # the CDUpdater. This requires symbolic CD-1 statistics: s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=2) # We create an updater for each parameter variable umap = {} for var in rbm.variables: # the learning rate is 0.001 pu = var + learning_rate * updaters.CDUpdater(rbm, var, s) umap[var] = pu # training t = trainers.MinibatchTrainer(rbm, umap) mse = monitors.reconstruction_mse(s, rbm.v) train = t.compile_function(initial_vmap, mb_size=1, monitors=[mse], name='train', mode=mode) # run for every sample point h, w = data.shape # costs for each thread costs_1 = [] costs_2 = [] costs_3 = [] costs_4 = [] # train the model for epoch in xrange(epochs): for i in xrange(h): # get cost for training the data point costs = [ m for m in train({rbm.v: data[i, :].reshape(1, n_visible)}) ] # print "MSE = %.4f, thread = %s" % (np.mean(costs), threadName) if threadName == "Thread-1": costs_1.append(np.mean(costs)) elif threadName == "Thread-2": costs_2.append(np.mean(costs)) elif threadName == "Thread-3": costs_3.append(np.mean(costs)) else: costs_4.append(np.mean(costs)) # all_costs = [] # for i in xrange(h): # # get cost for training the data point across all epochs # cost_point = [] # for epoch in xrange(epochs): # cost = [m for m in train({ rbm.v: data[i,:].reshape(1,40) })] # print epoch, cost, threadName, i # cost_point.append(cost) # # all_costs.append(cost_point) return costs_1, costs_2, costs_3, costs_4
n_hidden = 500 mb_size = 20 k = 15 learning_rate = 0.1 epochs = 15 print ">> Constructing RBM..." rbm = rbms.BinaryBinaryRBM(n_visible, n_hidden) initial_vmap = { rbm.v: T.matrix('v') } persistent_vmap = { rbm.h: theano.shared(np.zeros((mb_size, n_hidden), dtype=theano.config.floatX)) } # try to calculate weight updates using CD stats print ">> Constructing contrastive divergence updaters..." s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=k, persistent_vmap=persistent_vmap, mean_field_for_stats=[rbm.v], mean_field_for_gibbs=[rbm.v]) umap = {} for var in rbm.variables: pu = var + (learning_rate / float(mb_size)) * updaters.CDUpdater(rbm, var, s) umap[var] = pu print ">> Compiling functions..." t = trainers.MinibatchTrainer(rbm, umap) m = monitors.reconstruction_mse(s, rbm.v) m_data = s['data'][rbm.v] m_model = s['model'][rbm.v] e_data = rbm.energy(s['data']).mean() e_model = rbm.energy(s['model']).mean() # train = t.compile_function(initial_vmap, mb_size=32, monitors=[m], name='train', mode=mode)