def get_sparsity_cost(self): # update mean activation using exponential moving average hack_h = self.h_given_v(self.sp_pos_v) # define loss based on value of sp_type if self.sp_type == 'kl': eps = npy_floatX(1. / self.batch_size) loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \ - npy_floatX(1-targ) * T.log(1 - val + eps) else: raise NotImplementedError('Sparsity type %s is not implemented' % self.sp_type) cost = T.zeros((), dtype=floatX) params = [] if self.sp_weight['h']: cost += self.sp_weight['h'] * T.sum( loss(self.sp_targ['h'], hack_h.mean(axis=0))) params += [self.hbias] if self.sp_type in ['kl'] and self.sp_weight['h']: params += [self.Wv, self.alpha, self.mu] if self.flags.get('split_norm', False): params += [self.wv_norms] return utils_cost.Cost(cost, params)
def get_sparsity_cost(self): # update mean activation using exponential moving average hack_h = self.h_given_v(self.sp_pos_v) # define loss based on value of sp_type if self.sp_type == 'kl': eps = npy_floatX(1./self.batch_size) loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \ - npy_floatX(1-targ) * T.log(1 - val + eps) else: raise NotImplementedError('Sparsity type %s is not implemented' % self.sp_type) cost = T.zeros((), dtype=floatX) params = [] if self.sp_weight['h']: cost += self.sp_weight['h'] * T.sum(loss(self.sp_targ['h'], hack_h.mean(axis=0))) params += [self.hbias] if self.sp_type in ['kl'] and self.sp_weight['h']: params += [self.Wv, self.alpha, self.mu] if self.flags['split_norm']: params += [self.scalar_norms] return costmod.Cost(cost, params)
def get_sparsity_cost(self, pos_g, pos_h): # update mean activation using exponential moving average hack_g = self.g_given_hv(pos_h, self.input) hack_h = self.h_given_gv(pos_g, self.input) # define loss based on value of sp_type eps = npy_floatX(1./self.batch_size) loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \ - npy_floatX(1-targ) * T.log(1 - val + eps) params = [] cost = T.zeros((), dtype=floatX) if self.sp_weight['g'] or self.sp_weight['h']: params += [self.Wv, self.alpha, self.mu] if self.sp_weight['g']: cost += self.sp_weight['g'] * T.sum(loss(self.sp_targ['g'], hack_g.mean(axis=0))) params += [self.gbias] if self.sp_weight['h']: cost += self.sp_weight['h'] * T.sum(loss(self.sp_targ['h'], hack_h.mean(axis=0))) params += [self.hbias] if self.flags['split_norm']: params += [self.scalar_norms] cte = [pos_g, pos_h] return costmod.Cost(cost, params, cte)
def get_sparsity_cost(self): hack_h = self.h_given_v(self.input) # define loss based on value of sp_type eps = npy_floatX(1e-5) loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \ - npy_floatX(1-targ) * T.log(1 - val + eps) params = [] cost = T.zeros((), dtype=floatX) if self.sp_weight['h']: params += [self.Wv, self.hbias] cost += self.sp_weight['h'] * T.sum(loss(self.sp_targ['h'], hack_h).mean(axis=0)) return costmod.Cost(cost, params)
def get_sparsity_cost(self): hack_h = self.h_given_v(self.input) # define loss based on value of sp_type eps = npy_floatX(1. / self.batch_size) loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \ - npy_floatX(1-targ) * T.log(1 - val + eps) params = [] cost = T.zeros((), dtype=floatX) if self.sp_weight['h']: params += [self.Wv, self.hbias] cost += self.sp_weight['h'] * T.sum( loss(self.sp_targ['h'], hack_h.mean(axis=0))) return costmod.Cost(cost, params, [hack_h])
def __init__(self, numpy_rng = None, theano_rng = None, n_h=99, n_v=100, init_from=None, min_beta=0.9, num_beta=20, gamma=10, cratio=1, cdelay=0, neg_sample_steps=1, lr_spec=None, lr_mults = {}, iscales={}, clip_min={}, clip_max={}, l1 = {}, l2 = {}, sp_weight={}, sp_targ={}, batch_size = 13, compile=True, debug=False, seed=1241234, flags = {}, max_updates = 5e5, **kwargs): """ :param n_h: number of h-hidden units :param n_v: number of visible units :param iscales: optional dictionary containing initialization scale for each parameter :param neg_sample_steps: number of sampling updates to perform in negative phase. :param l1: hyper-parameter controlling amount of L1 regularization :param l2: hyper-parameter controlling amount of L2 regularization :param batch_size: size of positive and negative phase minibatch :param compile: compile sampling and learning functions :param seed: seed used to initialize numpy and theano RNGs. """ Model.__init__(self) Block.__init__(self) assert lr_spec is not None for k in ['h']: assert k in sp_weight.keys() for k in ['h']: assert k in sp_targ.keys() self.validate_flags(flags) self.jobman_channel = None self.jobman_state = {} self.register_names_to_del(['jobman_channel']) ### make sure all parameters are floatX ### for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v) for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v) for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v) for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v) for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) # dump initialization parameters to object for (k,v) in locals().iteritems(): if k!='self': setattr(self,k,v) # allocate random number generators self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng ############### ALLOCATE PARAMETERS ################# # allocate symbolic variable for input self.input = T.matrix('input') self.init_parameters() self.init_chains() # learning rate, with deferred 1./t annealing self.iter = sharedX(0.0, name='iter') if lr_spec['type'] == 'anneal': num = lr_spec['init'] * lr_spec['start'] denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter) self.lr = T.maximum(lr_spec['floor'], num/denum) elif lr_spec['type'] == '1_t': self.lr = npy_floatX(lr_spec['num']) / (self.iter + npy_floatX(lr_spec['denum'])) elif lr_spec['type'] == 'linear': lr_start = npy_floatX(lr_spec['start']) lr_end = npy_floatX(lr_spec['end']) self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates) elif lr_spec['type'] == 'constant': self.lr = sharedX(lr_spec['value'], name='lr') else: raise ValueError('Incorrect value for lr_spec[type]') # configure input-space (new pylearn2 feature?) self.input_space = VectorSpace(n_v) self.output_space = VectorSpace(n_h) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.logz = sharedX(0.0, name='logz') self.cpu_time = 0 self.error_record = [] if compile: self.do_theano() if init_from: raise NotImplementedError()
def __init__(self, input=None, Wv=None, vbias=None, hbias=None, numpy_rng=None, theano_rng=None, n_h=100, bw_s=1, n_v=100, init_from=None, neg_sample_steps=1, lr=None, lr_timestamp=None, lr_mults={}, iscales={}, clip_min={}, clip_max={}, vbound=5., l1={}, l2={}, orth_lambda=0., var_param_alpha='exp', var_param_beta='linear', sp_type='kl', sp_weight={}, sp_targ={}, batch_size=13, scalar_b=False, compile=True, debug=False, seed=1241234, my_save_path=None, save_at=None, save_every=None, flags={}, max_updates=5e5): """ :param n_h: number of h-hidden units :param n_v: number of visible units :param iscales: optional dictionary containing initialization scale for each parameter :param neg_sample_steps: number of sampling updates to perform in negative phase. :param l1: hyper-parameter controlling amount of L1 regularization :param l2: hyper-parameter controlling amount of L2 regularization :param batch_size: size of positive and negative phase minibatch :param compile: compile sampling and learning functions :param seed: seed used to initialize numpy and theano RNGs. """ Model.__init__(self) Block.__init__(self) assert lr is not None for k in ['Wv', 'vbias', 'hbias']: assert k in iscales.keys() iscales.setdefault('mu', 1.) iscales.setdefault('alpha', 0.) iscales.setdefault('beta', 0.) for k in ['h']: assert k in sp_weight.keys() for k in ['h']: assert k in sp_targ.keys() self.jobman_channel = None self.jobman_state = {} self.register_names_to_del(['jobman_channel']) ### make sure all parameters are floatX ### for (k, v) in l1.iteritems(): l1[k] = npy_floatX(v) for (k, v) in l2.iteritems(): l2[k] = npy_floatX(v) for (k, v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v) for (k, v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v) for (k, v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k, v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) # dump initialization parameters to object for (k, v) in locals().iteritems(): if k != 'self': setattr(self, k, v) # allocate random number generators self.rng = numpy.random.RandomState( seed) if numpy_rng is None else numpy_rng self.theano_rng = RandomStreams(self.rng.randint( 2**30)) if theano_rng is None else theano_rng ############### ALLOCATE PARAMETERS ################# self.n_s = self.n_h * self.bw_s self.wv_norms = sharedX(1.0 * numpy.ones(self.n_s), name='wv_norms') if Wv is None: wv_val = self.rng.randn(n_v, self.n_s) * iscales['Wv'] self.Wv = sharedX(wv_val, name='Wv') else: self.Wv = Wv self.Wh = numpy.zeros((self.n_s, self.n_h), dtype=floatX) for i in xrange(self.n_h): self.Wh[i * bw_s:(i + 1) * bw_s, i] = 1. # allocate shared variables for bias parameters if hbias is None: self.hbias = sharedX(iscales['hbias'] * numpy.ones(n_h), name='hbias') else: self.hbias = hbias # mean (mu) and precision (alpha) parameters on s self.mu = sharedX(iscales['mu'] * numpy.ones(self.n_s), name='mu') self.alpha = sharedX(iscales['alpha'] * numpy.ones(self.n_s), name='alpha') var_param_func = { 'exp': T.exp, 'softplus': T.nnet.softplus, 'linear': lambda x: x } self.alpha_prec = var_param_func[self.var_param_alpha](self.alpha) # diagonal of precision matrix of visible units self.vbound = sharedX(vbound, name='vbound') self.beta = sharedX(iscales['beta'] * numpy.ones(n_v), name='beta') self.beta_prec = var_param_func[self.var_param_beta](self.beta) # allocate shared variable for persistent chain self.neg_v = sharedX(self.rng.rand(batch_size, n_v), name='neg_v') self.neg_ev = sharedX(self.rng.rand(batch_size, n_v), name='neg_ev') self.neg_s = sharedX(self.rng.rand(batch_size, self.n_s), name='neg_s') self.neg_h = sharedX(self.rng.rand(batch_size, n_h), name='neg_h') # moving average values for sparsity self.sp_pos_v = sharedX(self.rng.rand(1, self.n_v), name='sp_pos_v') self.sp_pos_h = sharedX(self.rng.rand(1, self.n_h), name='sp_pog_h') # learning rate, with deferred 1./t annealing self.iter = sharedX(0.0, name='iter') if lr['type'] == 'anneal': num = lr['init'] * lr['start'] denum = T.maximum(lr['start'], lr['slope'] * self.iter) self.lr = T.maximum(lr['floor'], num / denum) elif lr['type'] == 'linear': lr_start = npy_floatX(lr['start']) lr_end = npy_floatX(lr['end']) self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX( self.max_updates) else: raise ValueError('Incorrect value for lr[type]') # learning rate - implemented as shared parameter for GPU self.lr_mults_it = {} self.lr_mults_shrd = {} for (k, v) in lr_mults.iteritems(): # make sure all learning rate multipliers are float64 self.lr_mults_it[k] = tools.HyperParamIterator( lr_timestamp, lr_mults[k]) self.lr_mults_shrd[k] = sharedX(self.lr_mults_it[k].value, name='lr_mults_shrd' + k) # allocate symbolic variable for input self.input = T.matrix('input') if input is None else input # configure input-space (new pylearn2 feature?) self.input_space = VectorSpace(n_v) self.output_space = VectorSpace(n_h) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.force_batch_size = batch_size # force minibatch size self.error_record = [] if compile: self.do_theano() #### load layer 1 parameters from file #### if init_from: self.load_params(init_from)
def __init__(self, numpy_rng = None, theano_rng = None, n_h=100, bw_s=1, n_v=100, init_from=None, neg_sample_steps=1, lr_spec=None, lr_timestamp=None, lr_mults = {}, iscales={}, clip_min={}, clip_max={}, truncation_bound={}, l1 = {}, l2 = {}, orth_lambda=0., var_param_alpha='exp', var_param_lambd='linear', sp_type='kl', sp_weight={}, sp_targ={}, batch_size = 13, compile=True, debug=False, seed=1241234, my_save_path=None, save_at=None, save_every=None, flags = {}, max_updates = 5e5): """ :param n_h: number of h-hidden units :param n_v: number of visible units :param iscales: optional dictionary containing initialization scale for each parameter :param neg_sample_steps: number of sampling updates to perform in negative phase. :param l1: hyper-parameter controlling amount of L1 regularization :param l2: hyper-parameter controlling amount of L2 regularization :param batch_size: size of positive and negative phase minibatch :param compile: compile sampling and learning functions :param seed: seed used to initialize numpy and theano RNGs. """ Model.__init__(self) Block.__init__(self) assert lr_spec is not None for k in ['Wv', 'hbias']: assert k in iscales.keys() iscales.setdefault('mu', 1.) iscales.setdefault('alpha', 0.) iscales.setdefault('lambd', 0.) for k in ['h']: assert k in sp_weight.keys() for k in ['h']: assert k in sp_targ.keys() self.validate_flags(flags) self.jobman_channel = None self.jobman_state = {} self.register_names_to_del(['jobman_channel']) ### make sure all parameters are floatX ### for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v) for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v) for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v) for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v) for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) # dump initialization parameters to object for (k,v) in locals().iteritems(): if k!='self': setattr(self,k,v) # allocate random number generators self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng # allocate symbolic variable for input self.input = T.matrix('input') self.init_parameters() self.init_chains() # learning rate, with deferred 1./t annealing self.iter = sharedX(0.0, name='iter') if lr_spec['type'] == 'anneal': num = lr_spec['init'] * lr_spec['start'] denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter) self.lr = T.maximum(lr_spec['floor'], num/denum) elif lr_spec['type'] == 'linear': lr_start = npy_floatX(lr_spec['start']) lr_end = npy_floatX(lr_spec['end']) self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates) else: raise ValueError('Incorrect value for lr_spec[type]') # configure input-space (new pylearn2 feature?) self.input_space = VectorSpace(n_v) self.output_space = VectorSpace(n_h) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.force_batch_size = batch_size # force minibatch size self.error_record = [] if compile: self.do_theano() #### load layer 1 parameters from file #### if init_from: self.load_params(init_from)
def __init__(self, input=None, Wv=None, vbias=None, hbias=None, numpy_rng = None, theano_rng = None, n_h=100, bw_s=1, n_v=100, init_from=None, neg_sample_steps=1, lr=None, lr_timestamp=None, lr_mults = {}, iscales={}, clip_min={}, clip_max={}, l1 = {}, l2 = {}, sp_type='kl', sp_weight={}, sp_targ={}, batch_size = 13, compile=True, debug=False, seed=1241234, my_save_path=None, save_at=None, save_every=None, flags = {}, max_updates = 5e5): """ :param n_h: number of h-hidden units :param n_v: number of visible units :param iscales: optional dictionary containing initialization scale for each parameter :param neg_sample_steps: number of sampling updates to perform in negative phase. :param l1: hyper-parameter controlling amount of L1 regularization :param l2: hyper-parameter controlling amount of L2 regularization :param batch_size: size of positive and negative phase minibatch :param compile: compile sampling and learning functions :param seed: seed used to initialize numpy and theano RNGs. """ Model.__init__(self) Block.__init__(self) assert lr is not None for k in ['Wv', 'vbias', 'hbias']: assert k in iscales.keys() iscales.setdefault('mu', 1.) iscales.setdefault('alpha', 0.) for k in ['h']: assert k in sp_weight.keys() for k in ['h']: assert k in sp_targ.keys() self.jobman_channel = None self.jobman_state = {} self.register_names_to_del(['jobman_channel']) ### make sure all parameters are floatX ### for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v) for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v) for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v) for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v) for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) # dump initialization parameters to object for (k,v) in locals().iteritems(): if k!='self': setattr(self,k,v) # allocate random number generators self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng ############### ALLOCATE PARAMETERS ################# self.n_s = self.n_h * self.bw_s if Wv is None: wv_val = self.rng.randn(n_v, self.n_s) * iscales['Wv'] self.Wv = sharedX(wv_val, name='Wv') else: self.Wv = Wv self.Wh = numpy.zeros((self.n_s, self.n_h), dtype=floatX) for i in xrange(self.n_h): self.Wh[i*bw_s:(i+1)*bw_s, i] = 1. # allocate shared variables for bias parameters if vbias is None: self.vbias = sharedX(iscales['vbias'] * numpy.ones(n_v), name='vbias') else: self.vbias = vbias # allocate shared variables for bias parameters if hbias is None: self.hbias = sharedX(iscales['hbias'] * numpy.ones(n_h), name='hbias') else: self.hbias = hbias # mean (mu) and precision (alpha) parameters on s self.mu = sharedX(iscales['mu'] * numpy.ones(self.n_s), name='mu') self.alpha = sharedX(iscales['alpha'] * numpy.ones(self.n_s), name='alpha') self.alpha_prec = T.exp(self.alpha) #### load layer 1 parameters from file #### if init_from: self.load_params(init_from) # allocate shared variable for persistent chain self.neg_v = sharedX(self.rng.rand(batch_size, n_v), name='neg_v') self.neg_ev = sharedX(self.rng.rand(batch_size, n_v), name='neg_ev') self.neg_s = sharedX(self.rng.rand(batch_size, self.n_s), name='neg_s') self.neg_h = sharedX(self.rng.rand(batch_size, n_h), name='neg_h') # moving average values for sparsity self.sp_pos_v = sharedX(self.rng.rand(1,self.n_v), name='sp_pos_v') self.sp_pos_h = sharedX(self.rng.rand(1,self.n_h), name='sp_pog_h') # learning rate, with deferred 1./t annealing self.iter = sharedX(0.0, name='iter') if lr['type'] == 'anneal': num = lr['init'] * lr['start'] denum = T.maximum(lr['start'], lr['slope'] * self.iter) self.lr = T.maximum(lr['floor'], num/denum) elif lr['type'] == 'linear': lr_start = npy_floatX(lr['start']) lr_end = npy_floatX(lr['end']) self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates) else: raise ValueError('Incorrect value for lr[type]') # learning rate - implemented as shared parameter for GPU self.lr_mults_it = {} self.lr_mults_shrd = {} for (k,v) in lr_mults.iteritems(): # make sure all learning rate multipliers are float64 self.lr_mults_it[k] = tools.HyperParamIterator(lr_timestamp, lr_mults[k]) self.lr_mults_shrd[k] = sharedX(self.lr_mults_it[k].value, name='lr_mults_shrd'+k) # allocate symbolic variable for input self.input = T.matrix('input') if input is None else input # configure input-space (new pylearn2 feature?) self.input_space = VectorSpace(n_v) self.output_space = VectorSpace(n_h) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.force_batch_size = batch_size # force minibatch size self.error_record = [] if compile: self.do_theano()
def __init__(self, numpy_rng=None, theano_rng=None, n_h=99, n_v=100, init_from=None, neg_sample_steps=1, lr_spec=None, lr_mults={}, iscales={}, clip_min={}, clip_max={}, l1={}, l2={}, sp_weight={}, sp_targ={}, batch_size=13, compile=True, debug=False, seed=1241234, my_save_path=None, save_at=None, save_every=None, flags={}, max_updates=5e5): """ :param n_h: number of h-hidden units :param n_v: number of visible units :param iscales: optional dictionary containing initialization scale for each parameter :param neg_sample_steps: number of sampling updates to perform in negative phase. :param l1: hyper-parameter controlling amount of L1 regularization :param l2: hyper-parameter controlling amount of L2 regularization :param batch_size: size of positive and negative phase minibatch :param compile: compile sampling and learning functions :param seed: seed used to initialize numpy and theano RNGs. """ Model.__init__(self) Block.__init__(self) assert lr_spec is not None for k in ['h']: assert k in sp_weight.keys() for k in ['h']: assert k in sp_targ.keys() self.validate_flags(flags) self.jobman_channel = None self.jobman_state = {} self.register_names_to_del(['jobman_channel']) ### make sure all parameters are floatX ### for (k, v) in l1.iteritems(): l1[k] = npy_floatX(v) for (k, v) in l2.iteritems(): l2[k] = npy_floatX(v) for (k, v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v) for (k, v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v) for (k, v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k, v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) # dump initialization parameters to object for (k, v) in locals().iteritems(): if k != 'self': setattr(self, k, v) # allocate random number generators self.rng = numpy.random.RandomState( seed) if numpy_rng is None else numpy_rng self.theano_rng = RandomStreams(self.rng.randint( 2**30)) if theano_rng is None else theano_rng ############### ALLOCATE PARAMETERS ################# # allocate symbolic variable for input self.input = T.matrix('input') self.init_parameters() self.init_chains() # learning rate, with deferred 1./t annealing self.iter = sharedX(0.0, name='iter') if lr_spec['type'] == 'anneal': num = lr_spec['init'] * lr_spec['start'] denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter) self.lr = T.maximum(lr_spec['floor'], num / denum) elif lr_spec['type'] == 'linear': lr_start = npy_floatX(lr_spec['start']) lr_end = npy_floatX(lr_spec['end']) self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX( self.max_updates) else: raise ValueError('Incorrect value for lr_spec[type]') # configure input-space (new pylearn2 feature?) self.input_space = VectorSpace(n_v) self.output_space = VectorSpace(n_h) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.force_batch_size = batch_size # force minibatch size self.error_record = [] if compile: self.do_theano() if init_from: raise NotImplementedError()
def __init__(self, input=None, Wv=None, hbias=None, numpy_rng = None, theano_rng = None, n_h=100, n_v=100, bw_h=10, init_from=None, neg_sample_steps=1, lr = 1e-3, lr_anneal_coeff=0, lr_timestamp=None, lr_mults = {}, iscales={}, clip_min={}, clip_max={}, l1 = {}, l2 = {}, sp_moving_avg=0.98, sp_type='KL', sp_weight={}, sp_targ={}, batch_size = 13, scalar_b = False, sparse_hmask = None, learn_h_weights = False, unit_norm_filters = True, compile=True, parametrize_sqrt_precision=True, debug=False, seed=1241234, my_save_path=None): """ :param n_h: number of h-hidden units :param n_v: number of visible units :param iscales: optional dictionary containing initialization scale for each parameter :param neg_sample_steps: number of sampling updates to perform in negative phase. :param l1: hyper-parameter controlling amount of L1 regularization :param l2: hyper-parameter controlling amount of L2 regularization :param batch_size: size of positive and negative phase minibatch :param compile: compile sampling and learning functions :param seed: seed used to initialize numpy and theano RNGs. """ super(ssRBM,self).__init__() for k in ['mu','alpha','beta', 'Wv', 'hbias']: assert k in iscales.keys() for k in ['h']: assert k in sp_weight.keys() for k in ['h']: assert k in sp_targ.keys() ### make sure all parameters are floatX ### for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v) for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v) for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v) for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v) for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) # dump initialization parameters to object for (k,v) in locals().iteritems(): if k!='self': setattr(self,k,v) # allocate random number generators self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng ############### ALLOCATE PARAMETERS ################# self.n_s = self.n_h * bw_h # allocate bilinear-weight matrices self.Wh = sharedX(sparse_hmask.mask, name='Wh') if Wv is None: wv_val = self.rng.randn(n_v, self.n_s) * iscales['Wv'] self.Wv = sharedX(wv_val, name='Wv') else: self.Wv = Wv # allocate shared variables for bias parameters if hbias is None: self.hbias = sharedX(iscales['hbias'] * numpy.ones(n_h), name='hbias') else: self.hbias = hbias # mean (mu) and precision (alpha) parameters on s self.mu = sharedX(iscales['mu'] * numpy.ones(self.n_s), name='mu') self.alpha = sharedX(iscales['alpha'] * numpy.ones(self.n_s), name='alpha') self.alpha_prec = self.alpha**2 if parametrize_sqrt_precision else self.alpha # diagonal of precision matrix of visible units self.beta = sharedX(iscales['beta'] * numpy.ones(n_v), name='beta') self.beta_prec = self.beta**2 if parametrize_sqrt_precision else self.beta #### load layer 1 parameters from file #### if init_from: self.load_params(init_from) # allocate shared variable for persistent chain self.neg_v = sharedX(self.rng.rand(batch_size, n_v), name='neg_v') self.neg_ev = sharedX(self.rng.rand(batch_size, n_v), name='neg_ev') self.neg_s = sharedX(self.rng.rand(batch_size, self.n_s), name='neg_s') self.neg_h = sharedX(self.rng.rand(batch_size, n_h), name='neg_h') # moving average values for sparsity self.sp_pos_v = sharedX(self.rng.rand(1,self.n_v), name='sp_pos_v') self.sp_pos_h = sharedX(self.rng.rand(1,self.n_h), name='sp_pog_h') # learning rate - implemented as shared parameter for GPU self.lr_shrd = sharedX(lr, name='lr_shrd') self.lr_mults_it = {} self.lr_mults_shrd = {} for (k,v) in lr_mults.iteritems(): # make sure all learning rate multipliers are float64 self.lr_mults_it[k] = tools.HyperParamIterator(lr_timestamp, lr_mults[k]) self.lr_mults_shrd[k] = sharedX(self.lr_mults_it[k].value, name='lr_mults_shrd'+k) # allocate symbolic variable for input self.input = T.matrix('input') if input is None else input # configure input-space (new pylearn2 feature?) self.input_space = VectorSpace(n_v) # counters used by pylearn2 trainers self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.force_batch_size = batch_size # force minibatch size self.error_record = [] ## ESTABLISH LIST OF LEARNT MODEL PARAMETERS ## self.params = [self.Wv, self.hbias, self.mu, self.alpha, self.beta] if self.learn_h_weights: self.params += [self.Wh] if compile: self.do_theano()