def __init__(self): """ gets a small batch of data sets up an S3C model and learns on the data creates an expression for the log likelihood of the data """ self.tol = 1e-5 #dataset = serial.load('${GOODFELI_TMP}/cifar10_preprocessed_train_1K.pkl') X = np.random.RandomState([1, 2, 3]).randn(1000, 108) #dataset.get_batch_design(1000) #X = X[:,0:2] #warnings.warn('hack') #X[0,0] = 1. #X[0,1] = -1. m, D = X.shape N = 300 self.model = S3C( nvis=D, #disable_W_update = 1, nhid=N, irange=.5, init_bias_hid=-.1, init_B=1., min_B=1e-8, max_B=1e8, tied_B=1, e_step=E_Step_Scan( #h_new_coeff_schedule = [ ], h_new_coeff_schedule=[.01]), init_alpha=1., min_alpha=1e-8, max_alpha=1e8, init_mu=1., m_step=Grad_M_Step(learning_rate=1.0), ) #warnings.warn('hack') #W = self.model.W.get_value() #W[0,0] = 1. #W[1,0] = 1. #self.model.W.set_value(W) self.orig_params = self.model.get_param_values() model = self.model self.mf_obs = model.e_step.infer(X) self.stats = SufficientStatistics.from_observations( needed_stats=model.m_step.needed_stats(), V=X, **self.mf_obs) self.prob = self.model.expected_log_prob_vhs( self.stats, H_hat=self.mf_obs['H_hat'], S_hat=self.mf_obs['S_hat']) self.X = X self.m = m self.D = D self.N = N
def __init__(self): """ gets a small batch of data sets up an S3C model and learns on the data creates an expression for the log likelihood of the data """ self.tol = 1e-5 #dataset = serial.load('${GOODFELI_TMP}/cifar10_preprocessed_train_1K.pkl') X = np.random.RandomState([1,2,3]).randn(1000,108) #dataset.get_batch_design(1000) #X = X[:,0:2] #warnings.warn('hack') #X[0,0] = 1. #X[0,1] = -1. m, D = X.shape N = 300 self.model = S3C(nvis = D, #disable_W_update = 1, nhid = N, irange = .5, init_bias_hid = -.1, init_B = 1., min_B = 1e-8, max_B = 1e8, tied_B = 1, e_step = E_Step_Scan( #h_new_coeff_schedule = [ ], h_new_coeff_schedule = [ .01 ] ), init_alpha = 1., min_alpha = 1e-8, max_alpha = 1e8, init_mu = 1., m_step = Grad_M_Step( learning_rate = 1.0 ), ) #warnings.warn('hack') #W = self.model.W.get_value() #W[0,0] = 1. #W[1,0] = 1. #self.model.W.set_value(W) self.orig_params = self.model.get_param_values() model = self.model self.mf_obs = model.e_step.infer(X) self.stats = SufficientStatistics.from_observations(needed_stats = model.m_step.needed_stats(), V =X, ** self.mf_obs) self.prob = self.model.expected_log_prob_vhs( self.stats , H_hat = self.mf_obs['H_hat'], S_hat = self.mf_obs['S_hat']) self.X = X self.m = m self.D = D self.N = N
def __init__(self): """ gets a small batch of data sets up an S3C model and learns on the data creates an expression for the log likelihood of the data """ # We also have to change the value of config.floatX in __init__. self.prev_floatX = config.floatX config.floatX = 'float64' try: self.tol = 1e-5 if config.mode in ["DebugMode", "DEBUG_MODE"]: X = np.random.RandomState([1, 2, 3]).randn(30, 108) m, D = X.shape N = 10 else: X = np.random.RandomState([1, 2, 3]).randn(1000, 108) m, D = X.shape N = 300 self.model = S3C(nvis = D, nhid = N, irange = .5, init_bias_hid = -.1, init_B = 1., min_B = 1e-8, max_B = 1e8, tied_B = 1, e_step = E_Step_Scan( h_new_coeff_schedule = [ .01 ] ), init_alpha = 1., min_alpha = 1e-8, max_alpha = 1e8, init_mu = 1., m_step = Grad_M_Step( learning_rate = 1.0 ), ) self.orig_params = self.model.get_param_values() model = self.model self.mf_obs = model.e_step.infer(X) self.stats = SufficientStatistics.from_observations(needed_stats = model.m_step.needed_stats(), V =X, ** self.mf_obs) self.prob = self.model.expected_log_prob_vhs( self.stats , H_hat = self.mf_obs['H_hat'], S_hat = self.mf_obs['S_hat']) self.X = X self.m = m self.D = D self.N = N finally: config.floatX = self.prev_floatX
e_step.register_model(model) print 'loading data' data = np.load(data_path) m,n = data.shape print 'batch_size: ',batch_size_str batch_size = int(batch_size_str) assert m % batch_size == 0 print 'building energy functional expression' V = T.matrix() obs = model.get_hidden_obs(V) needed_stats = S3C.energy_functional_needed_stats() stats = SufficientStatistics.from_observations(needed_stats = needed_stats, V = V, ** obs) energy_functional = model.energy_functional_batch( V =V, ** obs) assert len(energy_functional.type.broadcastable) == 1 print 'compiling energy functional theano function' f = function([V],energy_functional) print 'computing energy functional values' out = np.zeros((m,),dtype='float32') times = [] for i in xrange(0,m,batch_size): print '\t',i t1 = time.time()
param = sharedX(model.e_step.h_new_coeff_schedule[i], name='h' + str(i)) model.e_step.h_new_coeff_schedule[i] = param params.append(param) for i in xrange(len(model.e_step.s_new_coeff_schedule)): param = sharedX(model.e_step.s_new_coeff_schedule[i], name='s' + str(i)) model.e_step.s_new_coeff_schedule[i] = param params.append(param) param = sharedX(model.e_step.rho, name='rho') model.e_step.rho = param #params.append(param) obs = model.e_step.variational_inference(V) stats = SufficientStatistics.from_observations(needed_stats=needed_stats, V=V, **obs) obj = model.em_functional(stats=stats, H_hat=obs['H_hat'], S_hat=obs['S_hat'], var_s0_hat=obs['var_s0_hat'], var_s1_hat=obs['var_s1_hat']) grads = T.grad(obj, params) updates = {} for param, grad in zip(params, grads): updates[param] = T.clip(param + learning_rate * grad, 1e-7, 1. - 1e-7) print 'compiling function...'
assert len(trunc_kl.type.broadcastable) == 0 print 'compiling function...' from theano import function G = [ sharedX(np.zeros((batch_size, rbm.nhid), dtype='float32')) for rbm in model.dbm.rbms ] H = sharedX(np.zeros((batch_size, model.s3c.nhid), dtype='float32')) S = sharedX(np.zeros((batch_size, model.s3c.nhid), dtype='float32')) new_stats = SufficientStatistics.from_observations( needed_stats=needed_stats, V=V, H_hat=H, S_hat=S, var_s0_hat=obs['var_s0_hat'], var_s1_hat=obs['var_s1_hat']) obj = model.inference_procedure.truncated_KL( V, { "H_hat": H, "S_hat": S, "var_s0_hat": obs['var_s0_hat'], "var_s1_hat": obs['var_s1_hat'], "G_hat": G }) grad_G = [T.grad(obj, G_elem) for G_elem in G] grad_H = T.grad(obj, H)
learning_rate = float(sys.argv[5]) print 'defining em functional...' import theano.tensor as T V = T.matrix("V") model.make_pseudoparams() obs = model.e_step.variational_inference(V) from pylearn2.models.s3c import S3C needed_stats = S3C.expected_log_prob_vhs_needed_stats() from pylearn2.models.s3c import SufficientStatistics stats = SufficientStatistics.from_observations( needed_stats = needed_stats, V = V, ** obs ) em_functional = model.em_functional( stats = stats, H_hat = obs['H_hat'], S_hat = obs['S_hat'], var_s0_hat = obs['var_s0_hat'], var_s1_hat = obs['var_s1_hat']) assert len(em_functional.type.broadcastable) == 0 print 'compiling function...' from theano import function H = sharedX(np.zeros((batch_size, model.nhid), dtype='float32')) S = sharedX(np.zeros((batch_size, model.nhid), dtype='float32')) new_stats = SufficientStatistics.from_observations( needed_stats = needed_stats, V = V, H_hat = H, S_hat = S, var_s0_hat = obs['var_s0_hat'], var_s1_hat = obs['var_s1_hat']) obj = model.em_functional(stats = new_stats, H_hat = H, S_hat = S, var_s0_hat = obs['var_s0_hat'], var_s1_hat = obs['var_s1_hat'])
def __init__(self, model): """ model must be a PDDBM or S3C model """ self.verbose = False batch_size = 87 model._test_batch_size = batch_size self.model = model pddbm = hasattr(model,'dbm') if not pddbm: #hack s3c model to follow pddbm interface model.inference_procedure = model.e_step has_labels = False else: has_labels = model.dbm.num_classes > 0 V = T.matrix("V") if has_labels: Y = T.matrix("Y") else: Y = None if config.compute_test_value != 'off': V.tag.test_value = np.cast[V.type.dtype](model.get_input_space().get_origin_batch(batch_size)) self.model.make_pseudoparams() obs = model.inference_procedure.infer(V,Y) obs['H_hat'] = T.clip(obs['H_hat'],1e-7,1.-1e-7) if pddbm: obs['G_hat'] = tuple([ T.clip(elem,1e-7,1.-1e-7) for elem in obs['G_hat'] ]) needed_stats = S3C.expected_log_prob_vhs_needed_stats() trunc_kl = model.inference_procedure.truncated_KL(V, obs = obs, Y = Y).mean() assert len(trunc_kl.type.broadcastable) == 0 if pddbm: G = [ sharedX(np.zeros((batch_size, rbm.nhid), dtype='float32')) for rbm in model.dbm.rbms ] h_dim = model.s3c.nhid else: h_dim = model.nhid H = sharedX(np.zeros((batch_size, h_dim), dtype='float32')) S = sharedX(np.zeros((batch_size, h_dim), dtype='float32')) updates = { H : obs['H_hat'], S : obs['S_hat'] } if pddbm: for G_elem, G_hat_elem in zip(G, obs['G_hat']): updates[G_elem] = G_hat_elem inputs = [ V ] if has_labels: inputs.append(Y) if self.verbose: print 'batch gradient class compiling init function' self.init = function(inputs, trunc_kl, updates = updates ) if self.verbose: print 'done' new_stats = SufficientStatistics.from_observations( needed_stats = needed_stats, V = V, H_hat = H, S_hat = S, var_s0_hat = obs['var_s0_hat'], var_s1_hat = obs['var_s1_hat']) obs = { "H_hat" : H, "S_hat" : S, "var_s0_hat" : obs['var_s0_hat'], "var_s1_hat" : obs['var_s1_hat'], } if pddbm: obs['G_hat'] = G obj = self.model.inference_procedure.truncated_KL( V, obs = obs, Y = Y ).mean() if pddbm: grad_G_sym = [ T.grad(obj, G_elem) for G_elem in G ] grad_H_sym = T.grad(obj,H) grad_S_sym = T.grad(obj,S) grad_H = sharedX( H.get_value()) grad_S = sharedX( S.get_value()) updates = { grad_H : grad_H_sym, grad_S : grad_S_sym } if pddbm: grad_G = [ sharedX( G_elem.get_value()) for G_elem in G ] for grad_G_elem, grad_G_sym_elem in zip(grad_G,grad_G_sym): updates[grad_G_elem] = grad_G_sym_elem if self.verbose: print 'batch gradient class compiling gradient function' self.compute_grad = function(inputs, updates = updates ) if self.verbose: print 'done' if self.verbose: print 'batch gradient class compiling objective function' self.obj = function(inputs, obj) if self.verbose: print 'done' self.S = S self.H = H self.grad_S = grad_S self.grad_H = grad_H if pddbm: self.G = G self.grad_G = grad_G self.pddbm = pddbm self.has_labels = has_labels
def __init__(self, model): """ model must be a PDDBM or S3C model """ self.verbose = False batch_size = 87 model._test_batch_size = batch_size self.model = model pddbm = hasattr(model,'dbm') if not pddbm: #hack s3c model to follow pddbm interface model.inference_procedure = model.e_step has_labels = False else: has_labels = model.dbm.num_classes > 0 V = T.matrix("V") if has_labels: Y = T.matrix("Y") else: Y = None if config.compute_test_value != 'off': V.tag.test_value = np.cast[V.type.dtype](model.get_input_space().get_origin_batch(batch_size)) self.model.make_pseudoparams() obs = {} for key in model.inference_procedure.hidden_obs: obs[key] = model.inference_procedure.hidden_obs[key] obs['H_hat'] = T.clip(obs['H_hat'],1e-7,1.-1e-7) if pddbm: obs['G_hat'] = tuple([ T.clip(elem,1e-7,1.-1e-7) for elem in obs['G_hat'] ]) needed_stats = S3C.expected_log_prob_vhs_needed_stats() trunc_kl = model.inference_procedure.truncated_KL(V, obs, Y).mean() assert len(trunc_kl.type.broadcastable) == 0 if pddbm: G = model.inference_procedure.hidden_obs['G_hat'] h_dim = model.s3c.nhid else: h_dim = model.nhid H = model.inference_procedure.hidden_obs['H_hat'] S = model.inference_procedure.hidden_obs['H_hat'] inputs = [ V ] if has_labels: inputs.append(Y) if self.verbose: print 'batch gradient class compiling init function' self.init_kl = function(inputs, trunc_kl) if self.verbose: print 'done' new_stats = SufficientStatistics.from_observations( needed_stats = needed_stats, V = V, H_hat = H, S_hat = S, var_s0_hat = obs['var_s0_hat'], var_s1_hat = obs['var_s1_hat']) #obs = { # "H_hat" : H, # "S_hat" : S, # "var_s0_hat" : obs['var_s0_hat'], # "var_s1_hat" : obs['var_s1_hat'], # } if pddbm: obs['G_hat'] = G obj = self.model.inference_procedure.truncated_KL( V, obs, Y ).mean() if pddbm: grad_G_sym = [ T.grad(obj, G_elem) for G_elem in G ] grad_H_sym = T.grad(obj,H) grad_S_sym = T.grad(obj,S) grad_H = sharedX( H.get_value()) grad_S = sharedX( S.get_value()) updates = { grad_H : grad_H_sym, grad_S : grad_S_sym } if pddbm: grad_G = [ sharedX( G_elem.get_value()) for G_elem in G ] for grad_G_elem, grad_G_sym_elem in zip(grad_G,grad_G_sym): updates[grad_G_elem] = grad_G_sym_elem if self.verbose: print 'batch gradient class compiling gradient function' self.compute_grad = function(inputs, updates = updates ) if self.verbose: print 'done' if self.verbose: print 'batch gradient class compiling objective function' self.obj = function(inputs, obj) if self.verbose: print 'done' self.S = S self.H = H self.grad_S = grad_S self.grad_H = grad_H if pddbm: self.G = G self.grad_G = grad_G self.pddbm = pddbm self.has_labels = has_labels