def __init__(self, k, nvis, convergence_th=1e-6, max_iter=None, verbose=False): """ Parameters in conf: :type k: int :param k: number of clusters. :type convergence_th: float :param convergence_th: threshold of distance to clusters under which kmeans stops iterating. :type max_iter: int :param max_iter: maximum number of iterations. Defaults to infinity. """ Block.__init__(self) Model.__init__(self) self.input_space = VectorSpace(nvis) self.k = k self.convergence_th = convergence_th if max_iter: if max_iter < 0: raise Exception('KMeans init: max_iter should be positive.') self.max_iter = max_iter else: self.max_iter = float('inf') self.verbose = verbose
def __init__(self, rbms=None, max_updates=1e6, flags={}): Model.__init__(self) Block.__init__(self) self.jobman_channel = None self.jobman_state = {} self.validate_flags(flags) self.register_names_to_del(['jobman_channel']) # dump initialization parameters to object for (k,v) in locals().iteritems(): if k!='self': setattr(self,k,v) # validate that RBMs have the same number of units. for (rbm1, rbm2) in zip(rbms[:-1], rbms[1:]): assert rbm1.n_h == rbm2.n_v assert rbm1.batch_size == rbm2.batch_size #assert rbm1.flags['enable_centering'] #assert rbm2.flags['enable_centering'] self.rbms = rbms self.depth = len(rbms) self.rng = self.rbms[0].rng # configure input-space (necessary evil) self.input_space = VectorSpace(self.rbms[0].n_v) self.output_space = VectorSpace(self.rbms[-1].n_h) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.batch_size = self.rbms[0].batch_size self.cpu_time = 0 self.init_train_sequence() self.do_theano()
def __init__(self, n_vis, n_hid, sigma=0.4, W=None, h_bias=None, v_bias=None, numpy_rng=None,theano_rng=None): """ """ Model.__init__(self) # self.names_to_del = set(); self._test_batch_size = 2 Block.__init__(self) # self.fn = None; self.cpu_only = False self.n_vis = n_vis self.n_hid = n_hid self.sigma = sigma self.coeff = 1. / (self.sigma**2) #coefficient self.input_space = VectorSpace(dim=self.n_vis) # add input_space self.output_space = VectorSpace(dim=self.n_hid) # add output_space if numpy_rng is None: # create a number generator numpy_rng = numpy.random.RandomState(seed=19920130) self.numpy_rng = numpy_rng if theano_rng is None: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.theano_rng = theano_rng if W is None: init_W = numpy.asarray(numpy_rng.uniform( low=-4 * numpy.sqrt(6. / (n_hid + n_vis)), high=4 * numpy.sqrt(6. / (n_hid + n_vis)), size=(n_vis, n_hid)), dtype=theano.config.floatX) # theano shared variables for weights and biases W = theano.shared(value=init_W, name='W', borrow=True) else: assert isinstance(W, theano.tensor.sharedvar.TensorSharedVariable) assert W.get_value().ndim == 2 if h_bias is None: # create shared variable for hidden units bias h_bias = theano.shared(value=numpy.zeros(n_hid, dtype=theano.config.floatX), name='h_bias', borrow=True) else: assert isinstance(h_bias, theano.tensor.sharedvar.TensorSharedVariable) assert h_bias.get_value().ndim == 1 if v_bias is None: # create shared variable for visible units bias v_bias = theano.shared(value=numpy.zeros(n_vis, dtype=theano.config.floatX), name='v_bias', borrow=True) else: assert isinstance(W, theano.tensor.sharedvar.TensorSharedVariable) assert v_bias.get_value().ndim == 1 self.W = W self.h_bias = h_bias self.v_bias = v_bias self._params = [self.W, self.h_bias, self.v_bias]
def __init__(self, n_vis, n_hid, corruptor=None, W=None, b_enc=None, b_dec=None, numpy_rng=None, dec_f=True, extra_cost=None,theano_rng=None): """构造函数 dec_f: 解码单元是否包含非线性函数 extra_cost:除了基本的MSE Cost和CE Cost之外的代价函数其他惩罚项,例如稀疏惩罚,weight decay等等. 用于self.get_default_cost()方法中. 这样依赖需要在模型初始化之前加入希望添加的惩罚项即可. """ Model.__init__(self) # self.names_to_del = set(); self._test_batch_size = 2 Block.__init__(self) # self.fn = None; self.cpu_only = False self.n_vis = n_vis self.n_hid = n_hid self.extra_cost = extra_cost self.dec_f = dec_f if corruptor is not None: self.corruptor = corruptor self.input_space = VectorSpace(dim=self.n_vis) # add input_space self.output_space = VectorSpace(dim=self.n_hid) # add output_space if numpy_rng is None: # create a number generator numpy_rng = numpy.random.RandomState(seed=19900418) self.numpy_rng = numpy_rng if theano_rng is None: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.theano_rng = theano_rng if W is None: init_W = numpy.asarray(numpy_rng.uniform( low= -4 * numpy.sqrt(6. / (n_hid + n_vis + 1.)), high= 4 * numpy.sqrt(6. / (n_hid + n_vis + 1.)), size=(n_vis, n_hid)), dtype=theano.config.floatX) # theano shared variables for weights and biases W = theano.shared(value=init_W, name='W', borrow=True) if b_enc is None: # create shared variable for hidden units bias b_enc = theano.shared(value=numpy.zeros(n_hid, dtype=theano.config.floatX), name='b_enc', borrow=True) if b_dec is None: # create shared variable for visible units bias b_dec = theano.shared(value=numpy.zeros(n_vis, dtype=theano.config.floatX), name='b_dec', borrow=True) self.W = W self.b_enc = b_enc self.b_dec = b_dec self._params = [self.W, self.b_enc, self.b_dec]
def __init__(self, n_vis, n_hid, W=None, h_bias=None, v_bias=None, numpy_rng=None,theano_rng=None): Model.__init__(self) # self.names_to_del = set(); self._test_batch_size = 2 Block.__init__(self) # self.fn = None; self.cpu_only = False self.n_vis = n_vis self.n_hid = n_hid self.input_space = VectorSpace(dim=self.n_vis) # add input_space self.output_space = VectorSpace(dim=self.n_hid) # add output_space if numpy_rng is None: # create a number generator numpy_rng = numpy.random.RandomState(seed=19900418) self.numpy_rng = numpy_rng if theano_rng is None: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.theano_rng = theano_rng if W is None: init_W = numpy.asarray(numpy_rng.uniform( low=-4 * numpy.sqrt(6. / (n_hid + n_vis)), high=4 * numpy.sqrt(6. / (n_hid + n_vis)), size=(n_vis, n_hid)), dtype=theano.config.floatX) # theano shared variables for weights and biases W = theano.shared(value=init_W, name='W', borrow=True) if h_bias is None: # create shared variable for hidden units bias h_bias = theano.shared(value=numpy.zeros(n_hid, dtype=theano.config.floatX), name='h_bias', borrow=True) if v_bias is None: # create shared variable for visible units bias v_bias = theano.shared(value=numpy.zeros(n_vis, dtype=theano.config.floatX), name='v_bias', borrow=True) self.W = W self.h_bias = h_bias self.v_bias = v_bias self._params = [self.W, self.h_bias, self.v_bias]
def __init__(self, input=None, Wv=None, vbias=None, hbias=None, numpy_rng = None, theano_rng = None, n_h=100, bw_s=1, n_v=100, init_from=None, neg_sample_steps=1, lr=None, lr_timestamp=None, lr_mults = {}, iscales={}, clip_min={}, clip_max={}, l1 = {}, l2 = {}, sp_type='kl', sp_weight={}, sp_targ={}, batch_size = 13, compile=True, debug=False, seed=1241234, my_save_path=None, save_at=None, save_every=None, flags = {}, max_updates = 5e5): """ :param n_h: number of h-hidden units :param n_v: number of visible units :param iscales: optional dictionary containing initialization scale for each parameter :param neg_sample_steps: number of sampling updates to perform in negative phase. :param l1: hyper-parameter controlling amount of L1 regularization :param l2: hyper-parameter controlling amount of L2 regularization :param batch_size: size of positive and negative phase minibatch :param compile: compile sampling and learning functions :param seed: seed used to initialize numpy and theano RNGs. """ Model.__init__(self) Block.__init__(self) assert lr is not None for k in ['Wv', 'vbias', 'hbias']: assert k in iscales.keys() iscales.setdefault('mu', 1.) iscales.setdefault('alpha', 0.) for k in ['h']: assert k in sp_weight.keys() for k in ['h']: assert k in sp_targ.keys() self.jobman_channel = None self.jobman_state = {} self.register_names_to_del(['jobman_channel']) ### make sure all parameters are floatX ### for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v) for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v) for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v) for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v) for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) # dump initialization parameters to object for (k,v) in locals().iteritems(): if k!='self': setattr(self,k,v) # allocate random number generators self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng ############### ALLOCATE PARAMETERS ################# self.n_s = self.n_h * self.bw_s if Wv is None: wv_val = self.rng.randn(n_v, self.n_s) * iscales['Wv'] self.Wv = sharedX(wv_val, name='Wv') else: self.Wv = Wv self.Wh = numpy.zeros((self.n_s, self.n_h), dtype=floatX) for i in xrange(self.n_h): self.Wh[i*bw_s:(i+1)*bw_s, i] = 1. # allocate shared variables for bias parameters if vbias is None: self.vbias = sharedX(iscales['vbias'] * numpy.ones(n_v), name='vbias') else: self.vbias = vbias # allocate shared variables for bias parameters if hbias is None: self.hbias = sharedX(iscales['hbias'] * numpy.ones(n_h), name='hbias') else: self.hbias = hbias # mean (mu) and precision (alpha) parameters on s self.mu = sharedX(iscales['mu'] * numpy.ones(self.n_s), name='mu') self.alpha = sharedX(iscales['alpha'] * numpy.ones(self.n_s), name='alpha') self.alpha_prec = T.exp(self.alpha) #### load layer 1 parameters from file #### if init_from: self.load_params(init_from) # allocate shared variable for persistent chain self.neg_v = sharedX(self.rng.rand(batch_size, n_v), name='neg_v') self.neg_ev = sharedX(self.rng.rand(batch_size, n_v), name='neg_ev') self.neg_s = sharedX(self.rng.rand(batch_size, self.n_s), name='neg_s') self.neg_h = sharedX(self.rng.rand(batch_size, n_h), name='neg_h') # moving average values for sparsity self.sp_pos_v = sharedX(self.rng.rand(1,self.n_v), name='sp_pos_v') self.sp_pos_h = sharedX(self.rng.rand(1,self.n_h), name='sp_pog_h') # learning rate, with deferred 1./t annealing self.iter = sharedX(0.0, name='iter') if lr['type'] == 'anneal': num = lr['init'] * lr['start'] denum = T.maximum(lr['start'], lr['slope'] * self.iter) self.lr = T.maximum(lr['floor'], num/denum) elif lr['type'] == 'linear': lr_start = npy_floatX(lr['start']) lr_end = npy_floatX(lr['end']) self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates) else: raise ValueError('Incorrect value for lr[type]') # learning rate - implemented as shared parameter for GPU self.lr_mults_it = {} self.lr_mults_shrd = {} for (k,v) in lr_mults.iteritems(): # make sure all learning rate multipliers are float64 self.lr_mults_it[k] = tools.HyperParamIterator(lr_timestamp, lr_mults[k]) self.lr_mults_shrd[k] = sharedX(self.lr_mults_it[k].value, name='lr_mults_shrd'+k) # allocate symbolic variable for input self.input = T.matrix('input') if input is None else input # configure input-space (new pylearn2 feature?) self.input_space = VectorSpace(n_v) self.output_space = VectorSpace(n_h) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.force_batch_size = batch_size # force minibatch size self.error_record = [] if compile: self.do_theano()
def __init__(self, input=None, n_u=[100, 100], enable={}, load_from=None, iscales=None, clip_min={}, clip_max={}, pos_mf_steps=1, pos_sample_steps=0, neg_sample_steps=1, lr_spec={}, lr_mults={}, l1={}, l2={}, l1_inf={}, flags={}, momentum_lambda=0, cg_params={}, batch_size=13, computational_bs=0, compile=True, seed=1241234, sp_targ_h=None, sp_weight_h=None, sp_pos_k=5, my_save_path=None, save_at=None, save_every=None, max_updates=1e6): """ :param n_u: list, containing number of units per layer. n_u[0] contains number of visible units, while n_u[i] (with i > 0) contains number of hid. units at layer i. :param enable: dictionary of flags with on/off behavior :param iscales: optional dictionary containing initialization scale for each parameter. Key of dictionary should match the name of the associated shared variable. :param pos_mf_steps: number of mean-field iterations to perform in positive phase :param neg_sample_steps: number of sampling updates to perform in negative phase. :param lr: base learning rate :param lr_timestamp: list containing update indices at which to change the lr multiplier :param lr_mults: dictionary, optionally containing a list of learning rate multipliers for parameters of the model. Length of this list should match length of lr_timestamp (the lr mult will transition whenever we reach the associated timestamp). Keys should match the name of the shared variable, whose learning rate is to be adjusted. :param l1: dictionary, whose keys are model parameter names, and values are hyper-parameters controlling degree of L1-regularization. :param l2: same as l1, but for L2 regularization. :param l1_inf: same as l1, but the L1 penalty is centered as -\infty instead of 0. :param cg_params: dictionary with keys ['rtol','damp','maxiter'] :param batch_size: size of positive and negative phase minibatch :param computational_bs: batch size used internaly by natural gradient to reduce memory consumption :param seed: seed used to initialize numpy and theano RNGs. :param my_save_path: if None, do not save model. Otherwise, contains stem of filename to which we will save the model (everything but the extension). :param save_at: list containing iteration counts at which to save model :param save_every: scalar value. Save model every `save_every` iterations. """ Model.__init__(self) Block.__init__(self) ### VALIDATE PARAMETERS AND SET DEFAULT VALUES ### assert lr_spec is not None for (k, v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k, v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) [iscales.setdefault('bias%i' % i, 0.) for i in xrange(len(n_u))] [iscales.setdefault('W%i' % i, 0.1) for i in xrange(len(n_u))] flags.setdefault('enable_centering', False) flags.setdefault('enable_natural', False) flags.setdefault('enable_warm_start', False) flags.setdefault('mlbiases', False) flags.setdefault('precondition', None) flags.setdefault('minres', False) flags.setdefault('minresQLP', False) if flags['precondition'] == 'None': flags['precondition'] = None self.jobman_channel = None self.jobman_state = {} self.register_names_to_del(['jobman_channel']) ### DUMP INITIALIZATION PARAMETERS TO OBJECT ### for (k, v) in locals().iteritems(): if k != 'self': setattr(self, k, v) assert len(n_u) > 1 self.n_v = n_u[0] self.depth = len(n_u) # allocate random number generators self.rng = numpy.random.RandomState(seed) self.theano_rng = RandomStreams(self.rng.randint(2**30)) # allocate bilinear-weight matrices self.input = T.matrix() self.init_parameters() self.init_dparameters() self.init_centering() self.init_samples() # learning rate, with deferred 1./t annealing self.iter = sharedX(0.0, name='iter') if lr_spec['type'] == 'anneal': num = lr_spec['init'] * lr_spec['start'] denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter) self.lr = T.maximum(lr_spec['floor'], num / denum) elif lr_spec['type'] == 'linear': lr_start = npy_floatX(lr_spec['start']) lr_end = npy_floatX(lr_spec['end']) self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX( self.max_updates) else: raise ValueError('Incorrect value for lr_spec[type]') # counter for CPU-time self.cpu_time = 0. if load_from: self.load_parameters(fname=load_from) # configure input-space (?new pylearn2 feature?) self.input_space = VectorSpace(n_u[0]) self.output_space = VectorSpace(n_u[-1]) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.force_batch_size = batch_size # force minibatch size self.error_record = [] if compile: self.do_theano()
def __init__(self, model_type=None, alpha=0.2, n_vis_img=None, n_vis_txt=None, n_hid_img=None, n_hid_txt=None, corruptor_img=None, corruptor_txt=None, W_img=None, W_txt=None, b_enc_img=None, b_enc_txt=None, b_dec_img=None, b_dec_txt=None, dec_f_img=True, dec_f_txt=True, img_AE=None, txt_AE=None, numpy_rng=None, theano_rng=None): """ model_type: String, 选择模型类型,目的是为了控制get_default_cost()方法找到所希望的训练代价 可选参数: 'Combine', 'CrossModal', 'FullModal' param: alpha, 标准代价和关联代价的权重稀疏,alpha越大则标准代价在总的代价函数中的比重越大 param: img_AE, 图像端用AE param: txt_AE, 文本端用AE """ Model.__init__(self) # self.names_to_del = set(); self._test_batch_size = 2 Block.__init__(self) # self.fn = None; self.cpu_only = False assert model_type in ['Combine', 'CrossModal', 'FullModal'] self.model_type = model_type self.alpha = alpha if numpy_rng is None: # create a number generator numpy_rng = numpy.random.RandomState(seed=19900418) self.numpy_rng = numpy_rng if theano_rng is None: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.theano_rng = theano_rng #两个AE共用的部分只有随机数发生器 if img_AE is None: assert n_vis_img is not None assert n_hid_img is not None img_AE = MyAutoEncoder(n_vis=n_vis_img, n_hid=n_hid_img, corruptor=corruptor_img, W=W_img, b_enc=b_enc_img, b_dec=b_dec_img, dec_f=dec_f_img, numpy_rng=self.numpy_rng, theano_rng=self.theano_rng) if txt_AE is None: assert n_vis_txt is not None assert n_hid_txt is not None txt_AE = MyAutoEncoder(n_vis=n_vis_txt, n_hid=n_hid_txt, corruptor=corruptor_txt, W=W_txt, b_enc=b_enc_txt, b_dec=b_dec_txt, dec_f=dec_f_txt, numpy_rng=self.numpy_rng, theano_rng=self.theano_rng) assert img_AE.n_hid == txt_AE.n_hid #目前的模型只能接受两端具有相同维度的编码空间 self.img_AE = img_AE self.txt_AE = txt_AE self.W_img = img_AE.W #not used self.W_txt = txt_AE.W #not used self.b_enc_img = img_AE.b_enc #not used self.b_dec_img = img_AE.b_dec #not used self.b_enc_txt = txt_AE.b_enc #not used self.b_dec_txt = txt_AE.b_dec #not used self.n_vis_img = self.img_AE.n_vis self.n_vis_txt = self.txt_AE.n_vis self.n_hid_img = self.img_AE.n_hid self.n_hid_txt = self.txt_AE.n_hid self.n_vis = self.img_AE.n_vis + self.txt_AE.n_vis self.n_hid = self.img_AE.n_hid + self.txt_AE.n_hid self.input_space = VectorSpace(dim=self.n_vis) # add input_space self.output_space = VectorSpace(dim=self.n_hid) # add output_space #init_W = numpy.concatenate([self.img_AE.W, self.txt_AE_W], axis=1) #self.W = theano.shared(value=init_W, name='W', borrow=True) #参数顺序:图像权值矩阵, 图像编码偏置,图像解码偏置,文本权值矩阵,文本编码偏置,文本解码偏置 self._params = [self.img_AE.W, self.img_AE.b_enc, self.img_AE.b_dec, self.txt_AE.W, self.txt_AE.b_enc, self.txt_AE.b_dec]
# Set PCA subclass from argument. if args.algorithm == 'cov_eig': PCAImpl = CovEigPCA elif args.algorithm == 'svd': PCAImpl = SVDPCA elif args.algorithm == 'online': PCAImpl = OnlinePCA conf['minibatch_size'] = args.minibatch_size else: # This should never happen. raise NotImplementedError(args.algorithm) # Load precomputed PCA transformation if requested; otherwise compute it. if args.load_file: pca = Block.load(args.load_file) else: print "... computing PCA" pca = PCAImpl(**conf) pca.train(train_data) # Save the computed transformation. pca.save(args.save_file) # Apply the transformation to test and valid subsets. inputs = tensor.matrix() pca_transform = theano.function([inputs], pca(inputs)) valid_pca = pca_transform(valid_data) test_pca = pca_transform(test_data) print >> sys.stderr, "New shapes:", map(numpy.shape, [valid_pca, test_pca]) # TODO: Compute ALC here when the code using the labels is ready.
def __init__(self, numpy_rng = None, theano_rng = None, n_h=100, bw_s=1, n_v=100, init_from=None, neg_sample_steps=1, lr_spec=None, lr_timestamp=None, lr_mults = {}, iscales={}, clip_min={}, clip_max={}, truncation_bound={}, l1 = {}, l2 = {}, orth_lambda=0., var_param_alpha='exp', var_param_lambd='linear', sp_type='kl', sp_weight={}, sp_targ={}, batch_size = 13, compile=True, debug=False, seed=1241234, my_save_path=None, save_at=None, save_every=None, flags = {}, max_updates = 5e5): """ :param n_h: number of h-hidden units :param n_v: number of visible units :param iscales: optional dictionary containing initialization scale for each parameter :param neg_sample_steps: number of sampling updates to perform in negative phase. :param l1: hyper-parameter controlling amount of L1 regularization :param l2: hyper-parameter controlling amount of L2 regularization :param batch_size: size of positive and negative phase minibatch :param compile: compile sampling and learning functions :param seed: seed used to initialize numpy and theano RNGs. """ Model.__init__(self) Block.__init__(self) assert lr_spec is not None for k in ['Wv', 'hbias']: assert k in iscales.keys() iscales.setdefault('mu', 1.) iscales.setdefault('alpha', 0.) iscales.setdefault('lambd', 0.) for k in ['h']: assert k in sp_weight.keys() for k in ['h']: assert k in sp_targ.keys() self.validate_flags(flags) self.jobman_channel = None self.jobman_state = {} self.register_names_to_del(['jobman_channel']) ### make sure all parameters are floatX ### for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v) for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v) for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v) for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v) for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) # dump initialization parameters to object for (k,v) in locals().iteritems(): if k!='self': setattr(self,k,v) # allocate random number generators self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng # allocate symbolic variable for input self.input = T.matrix('input') self.init_parameters() self.init_chains() # learning rate, with deferred 1./t annealing self.iter = sharedX(0.0, name='iter') if lr_spec['type'] == 'anneal': num = lr_spec['init'] * lr_spec['start'] denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter) self.lr = T.maximum(lr_spec['floor'], num/denum) elif lr_spec['type'] == 'linear': lr_start = npy_floatX(lr_spec['start']) lr_end = npy_floatX(lr_spec['end']) self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates) else: raise ValueError('Incorrect value for lr_spec[type]') # configure input-space (new pylearn2 feature?) self.input_space = VectorSpace(n_v) self.output_space = VectorSpace(n_h) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.force_batch_size = batch_size # force minibatch size self.error_record = [] if compile: self.do_theano() #### load layer 1 parameters from file #### if init_from: self.load_params(init_from)
def __init__(self, nvis, nhid, irange=0.5, rng=None, init_bias_vis = 0.0, init_bias_hid=0.0, base_lr = 1e-3, anneal_start = None, nchains = 100, sml_gibbs_steps = 1, random_patches_src = None, monitor_reconstruction = False): """ Construct an RBM object. Parameters ---------- nvis : int Number of visible units in the model. nhid : int Number of hidden units in the model. irange : float, optional The size of the initial interval around 0 for weights. rng : RandomState object or seed NumPy RandomState object to use when initializing parameters of the model, or (integer) seed to use to create one. init_bias_vis : array_like, optional Initial value of the visible biases, broadcasted as necessary. init_bias_hid : array_like, optional initial value of the hidden biases, broadcasted as necessary. monitor_reconstruction : if True, will request a monitoring channel to monitor reconstruction error random_patches_src: Either None, or a Dataset from which to draw random patches in order to initialize the weights. Patches will be multiplied by irange Parameters for default SML learning rule: base_lr : the base learning rate anneal_start : number of steps after which to start annealing on a 1/t schedule nchains: number of negative chains sml_gibbs_steps: number of gibbs steps to take per update """ Model.__init__(self) Block.__init__(self) if rng is None: # TODO: global rng configuration stuff. rng = numpy.random.RandomState(1001) self.rng = rng try: b_vis = numpy.zeros(nvis) b_vis += init_bias_vis except ValueError: raise ValueError("bad shape or value for init_bias_vis") self.bias_vis = sharedX(b_vis, name='bias_vis', borrow=True) try: b_hid = numpy.zeros(nhid) b_hid += init_bias_hid except ValueError: raise ValueError('bad shape or value for init_bias_hid') self.bias_hid = sharedX(b_hid, name='bias_hid', borrow=True) self.random_patches_src = random_patches_src self.register_names_to_del(['random_patches_src']) if random_patches_src is None: W = rng.uniform(-irange, irange, (nvis, nhid)) else: if hasattr(random_patches_src, '__array__'): W = irange * random_patches_src.T assert W.shape == (nvis, nhid) else: #assert type(irange) == type(0.01) #assert irange == 0.01 W = irange * random_patches_src.get_batch_design(nhid).T self.weights = sharedX( W, name='W', borrow=True ) self.__dict__.update(nhid=nhid, nvis=nvis) self._params = [self.bias_vis, self.bias_hid, self.weights] self.base_lr = base_lr self.anneal_start = anneal_start self.nchains = nchains self.sml_gibbs_steps = sml_gibbs_steps
def __init__(self, input_space, output_channels, pool_shape, batch_size=None, detector_axes=('b', 'c', 0, 1), kernel_shape=(2,2), kernel_stride=(1, 1), border_mode='valid', transformer=None, h_bias=None, v_bias=None, numpy_rng=None,theano_rng=None): """ vis_space: Conv2DSpace transformer: pylearn2.linear.Conv2D instance h_bias: vector, 大小等于输出的feature maps数,每个分量对应一个feature map v_bias: vector, 大小等于输入的feature maps数,每个分量对应一个feature map pool_shape: pool_stride: 根据Honglak Lee的原文,pool区域无交叠,于是要求pool_stride=pool_shape,因此暂时不单独设置pool_stride参数 需要注意,对于卷积RBM,其隐层对应于卷积后的detector_layer,而输出则对应与pool_layer,因此相对于普通RBM只有输入和输出两个space,CRBM有三个space """ Model.__init__(self) # self.names_to_del = set(); self._test_batch_size = 2 Block.__init__(self) # self.fn = None; self.cpu_only = False self.kernel_shape = kernel_shape self.kernel_stride = kernel_stride self.pool_shape = pool_shape self.pool_stride = pool_shape self.border_mode = border_mode self.batch_size = batch_size self.force_batch_size = batch_size input_shape = input_space.shape input_channels = input_space.num_channels if self.border_mode == 'valid': detector_shape = [(input_shape[0] - kernel_shape[0])/int(kernel_stride[0]) + 1, (input_shape[1] - kernel_shape[1])/kernel_stride[1] + 1] elif self.border_mode == 'full': detector_shape = [(input_shape[0] + kernel_shape[0])/int(kernel_stride[0]) - 1, (input_shape[1] + kernel_shape[1])/kernel_stride[1] - 1] assert isinstance(input_space, Conv2DSpace) self.input_space = input_space # add input_space self.detector_space = Conv2DSpace(shape=detector_shape, num_channels=output_channels, axes=detector_axes) # add detector_space #当前只考虑detector layer的feature map可以被pool_shape无交叠完整分割的情况 #今后需要补充:边缘补齐的情况 output_shape = (detector_shape[0] / pool_shape[0], detector_shape[1] / pool_shape[1]) self.output_space = Conv2DSpace(shape=output_shape, num_channels=output_channels, axes=detector_axes) # add output_space self.n_vis = numpy.prod(input_space.shape) * input_space.num_channels self.n_hid = detector_shape[0] * detector_shape[1] * output_channels if numpy_rng is None: # create a number generator numpy_rng = numpy.random.RandomState(seed=19900418) self.numpy_rng = numpy_rng if theano_rng is None: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.theano_rng = theano_rng if transformer is None: irange = 4 * numpy.sqrt(6. / (self.n_hid + self.n_vis)) transformer = make_random_conv2D(irange=irange, input_space=self.input_space, output_space=self.detector_space, kernel_shape = self.kernel_shape, batch_size = self.batch_size, subsample = kernel_stride,border_mode = self.border_mode, rng=self.numpy_rng) else: assert isinstance(transformer, Conv2D) if h_bias is None: # create shared variable for hidden units bias h_bias = theano.shared(value=numpy.zeros(self.detector_space.num_channels, dtype=theano.config.floatX), name='h_bias', borrow=True) if v_bias is None: # create shared variable for visible units bias v_bias = theano.shared(value=numpy.zeros(self.input_space.num_channels, dtype=theano.config.floatX), name='v_bias', borrow=True) self.transformer = transformer self.h_bias = h_bias self.v_bias = v_bias self._params = safe_union(self.transformer.get_params(), [self.h_bias, self.v_bias])
def __init__(self, input=None, Wv=None, vbias=None, hbias=None, numpy_rng=None, theano_rng=None, n_h=100, bw_s=1, n_v=100, init_from=None, neg_sample_steps=1, lr=None, lr_timestamp=None, lr_mults={}, iscales={}, clip_min={}, clip_max={}, vbound=5., l1={}, l2={}, orth_lambda=0., var_param_alpha='exp', var_param_beta='linear', sp_type='kl', sp_weight={}, sp_targ={}, batch_size=13, scalar_b=False, compile=True, debug=False, seed=1241234, my_save_path=None, save_at=None, save_every=None, flags={}, max_updates=5e5): """ :param n_h: number of h-hidden units :param n_v: number of visible units :param iscales: optional dictionary containing initialization scale for each parameter :param neg_sample_steps: number of sampling updates to perform in negative phase. :param l1: hyper-parameter controlling amount of L1 regularization :param l2: hyper-parameter controlling amount of L2 regularization :param batch_size: size of positive and negative phase minibatch :param compile: compile sampling and learning functions :param seed: seed used to initialize numpy and theano RNGs. """ Model.__init__(self) Block.__init__(self) assert lr is not None for k in ['Wv', 'vbias', 'hbias']: assert k in iscales.keys() iscales.setdefault('mu', 1.) iscales.setdefault('alpha', 0.) iscales.setdefault('beta', 0.) for k in ['h']: assert k in sp_weight.keys() for k in ['h']: assert k in sp_targ.keys() self.jobman_channel = None self.jobman_state = {} self.register_names_to_del(['jobman_channel']) ### make sure all parameters are floatX ### for (k, v) in l1.iteritems(): l1[k] = npy_floatX(v) for (k, v) in l2.iteritems(): l2[k] = npy_floatX(v) for (k, v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v) for (k, v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v) for (k, v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k, v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) # dump initialization parameters to object for (k, v) in locals().iteritems(): if k != 'self': setattr(self, k, v) # allocate random number generators self.rng = numpy.random.RandomState( seed) if numpy_rng is None else numpy_rng self.theano_rng = RandomStreams(self.rng.randint( 2**30)) if theano_rng is None else theano_rng ############### ALLOCATE PARAMETERS ################# self.n_s = self.n_h * self.bw_s self.wv_norms = sharedX(1.0 * numpy.ones(self.n_s), name='wv_norms') if Wv is None: wv_val = self.rng.randn(n_v, self.n_s) * iscales['Wv'] self.Wv = sharedX(wv_val, name='Wv') else: self.Wv = Wv self.Wh = numpy.zeros((self.n_s, self.n_h), dtype=floatX) for i in xrange(self.n_h): self.Wh[i * bw_s:(i + 1) * bw_s, i] = 1. # allocate shared variables for bias parameters if hbias is None: self.hbias = sharedX(iscales['hbias'] * numpy.ones(n_h), name='hbias') else: self.hbias = hbias # mean (mu) and precision (alpha) parameters on s self.mu = sharedX(iscales['mu'] * numpy.ones(self.n_s), name='mu') self.alpha = sharedX(iscales['alpha'] * numpy.ones(self.n_s), name='alpha') var_param_func = { 'exp': T.exp, 'softplus': T.nnet.softplus, 'linear': lambda x: x } self.alpha_prec = var_param_func[self.var_param_alpha](self.alpha) # diagonal of precision matrix of visible units self.vbound = sharedX(vbound, name='vbound') self.beta = sharedX(iscales['beta'] * numpy.ones(n_v), name='beta') self.beta_prec = var_param_func[self.var_param_beta](self.beta) # allocate shared variable for persistent chain self.neg_v = sharedX(self.rng.rand(batch_size, n_v), name='neg_v') self.neg_ev = sharedX(self.rng.rand(batch_size, n_v), name='neg_ev') self.neg_s = sharedX(self.rng.rand(batch_size, self.n_s), name='neg_s') self.neg_h = sharedX(self.rng.rand(batch_size, n_h), name='neg_h') # moving average values for sparsity self.sp_pos_v = sharedX(self.rng.rand(1, self.n_v), name='sp_pos_v') self.sp_pos_h = sharedX(self.rng.rand(1, self.n_h), name='sp_pog_h') # learning rate, with deferred 1./t annealing self.iter = sharedX(0.0, name='iter') if lr['type'] == 'anneal': num = lr['init'] * lr['start'] denum = T.maximum(lr['start'], lr['slope'] * self.iter) self.lr = T.maximum(lr['floor'], num / denum) elif lr['type'] == 'linear': lr_start = npy_floatX(lr['start']) lr_end = npy_floatX(lr['end']) self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX( self.max_updates) else: raise ValueError('Incorrect value for lr[type]') # learning rate - implemented as shared parameter for GPU self.lr_mults_it = {} self.lr_mults_shrd = {} for (k, v) in lr_mults.iteritems(): # make sure all learning rate multipliers are float64 self.lr_mults_it[k] = tools.HyperParamIterator( lr_timestamp, lr_mults[k]) self.lr_mults_shrd[k] = sharedX(self.lr_mults_it[k].value, name='lr_mults_shrd' + k) # allocate symbolic variable for input self.input = T.matrix('input') if input is None else input # configure input-space (new pylearn2 feature?) self.input_space = VectorSpace(n_v) self.output_space = VectorSpace(n_h) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.force_batch_size = batch_size # force minibatch size self.error_record = [] if compile: self.do_theano() #### load layer 1 parameters from file #### if init_from: self.load_params(init_from)
def __init__(self, numpy_rng=None, theano_rng=None, n_h=99, n_v=100, init_from=None, neg_sample_steps=1, lr_spec=None, lr_mults={}, iscales={}, clip_min={}, clip_max={}, l1={}, l2={}, sp_weight={}, sp_targ={}, batch_size=13, compile=True, debug=False, seed=1241234, my_save_path=None, save_at=None, save_every=None, flags={}, max_updates=5e5): """ :param n_h: number of h-hidden units :param n_v: number of visible units :param iscales: optional dictionary containing initialization scale for each parameter :param neg_sample_steps: number of sampling updates to perform in negative phase. :param l1: hyper-parameter controlling amount of L1 regularization :param l2: hyper-parameter controlling amount of L2 regularization :param batch_size: size of positive and negative phase minibatch :param compile: compile sampling and learning functions :param seed: seed used to initialize numpy and theano RNGs. """ Model.__init__(self) Block.__init__(self) assert lr_spec is not None for k in ['h']: assert k in sp_weight.keys() for k in ['h']: assert k in sp_targ.keys() self.validate_flags(flags) self.jobman_channel = None self.jobman_state = {} self.register_names_to_del(['jobman_channel']) ### make sure all parameters are floatX ### for (k, v) in l1.iteritems(): l1[k] = npy_floatX(v) for (k, v) in l2.iteritems(): l2[k] = npy_floatX(v) for (k, v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v) for (k, v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v) for (k, v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k, v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) # dump initialization parameters to object for (k, v) in locals().iteritems(): if k != 'self': setattr(self, k, v) # allocate random number generators self.rng = numpy.random.RandomState( seed) if numpy_rng is None else numpy_rng self.theano_rng = RandomStreams(self.rng.randint( 2**30)) if theano_rng is None else theano_rng ############### ALLOCATE PARAMETERS ################# # allocate symbolic variable for input self.input = T.matrix('input') self.init_parameters() self.init_chains() # learning rate, with deferred 1./t annealing self.iter = sharedX(0.0, name='iter') if lr_spec['type'] == 'anneal': num = lr_spec['init'] * lr_spec['start'] denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter) self.lr = T.maximum(lr_spec['floor'], num / denum) elif lr_spec['type'] == 'linear': lr_start = npy_floatX(lr_spec['start']) lr_end = npy_floatX(lr_spec['end']) self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX( self.max_updates) else: raise ValueError('Incorrect value for lr_spec[type]') # configure input-space (new pylearn2 feature?) self.input_space = VectorSpace(n_v) self.output_space = VectorSpace(n_h) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.force_batch_size = batch_size # force minibatch size self.error_record = [] if compile: self.do_theano() if init_from: raise NotImplementedError()
def perform(self, X): # work around some awkwardness with blocks rval = Block.perform(self, X) if isinstance(rval, list): rval = tuple(rval) return rval
def __init__(self, nvis = None, nhid = None, vis_space = None, hid_space = None, transformer = None, irange=0.5, rng=None, init_bias_vis = None, init_bias_vis_marginals = None, init_bias_hid=0.0, base_lr = 1e-3, anneal_start = None, nchains = 100, sml_gibbs_steps = 1, random_patches_src = None, monitor_reconstruction = False): """ Construct an RBM object. Parameters ---------- nvis : int Number of visible units in the model. (Specifying this implies that the model acts on a vector, i.e. it sets vis_space = pylearn2.space.VectorSpace(nvis) ) nhid : int Number of hidden units in the model. (Specifying this implies that the model acts on a vector) vis_space: A pylearn2.space.Space object describing what kind of vector space the RBM acts on. Don't specify if you used nvis / hid hid_space: A pylearn2.space.Space object describing what kind of vector space the RBM's hidden units live in. Don't specify if you used nvis / nhid init_bias_vis_marginals: either None, or a Dataset to use to initialize the visible biases to the inverse sigmoid of the data marginals irange : float, optional The size of the initial interval around 0 for weights. rng : RandomState object or seed NumPy RandomState object to use when initializing parameters of the model, or (integer) seed to use to create one. init_bias_vis : array_like, optional Initial value of the visible biases, broadcasted as necessary. init_bias_hid : array_like, optional initial value of the hidden biases, broadcasted as necessary. monitor_reconstruction : if True, will request a monitoring channel to monitor reconstruction error random_patches_src: Either None, or a Dataset from which to draw random patches in order to initialize the weights. Patches will be multiplied by irange Parameters for default SML learning rule: base_lr : the base learning rate anneal_start : number of steps after which to start annealing on a 1/t schedule nchains: number of negative chains sml_gibbs_steps: number of gibbs steps to take per update """ Model.__init__(self) Block.__init__(self) if init_bias_vis_marginals is not None: assert init_bias_vis is None X = init_bias_vis_marginals.X assert X.min() >= 0.0 assert X.max() <= 1.0 marginals = X.mean(axis=0) #rescale the marginals a bit to avoid NaNs init_bias_vis = inverse_sigmoid_numpy(.01 + .98 * marginals) if init_bias_vis is None: init_bias_vis = 0.0 if rng is None: # TODO: global rng configuration stuff. rng = numpy.random.RandomState(1001) self.rng = rng if vis_space is None: #if we don't specify things in terms of spaces and a transformer, #assume dense matrix multiplication and work off of nvis, nhid assert hid_space is None assert transformer is None or isinstance(transformer,MatrixMul) assert nvis is not None assert nhid is not None if transformer is None: if random_patches_src is None: W = rng.uniform(-irange, irange, (nvis, nhid)) else: if hasattr(random_patches_src, '__array__'): W = irange * random_patches_src.T assert W.shape == (nvis, nhid) else: #assert type(irange) == type(0.01) #assert irange == 0.01 W = irange * random_patches_src.get_batch_design(nhid).T self.transformer = MatrixMul( sharedX( W, name='W', borrow=True ) ) else: self.transformer = transformer self.vis_space = VectorSpace(nvis) self.hid_space = VectorSpace(nhid) else: assert hid_space is not None assert transformer is not None assert nvis is None assert nhid is None self.vis_space = vis_space self.hid_space = hid_space self.transformer = transformer try: b_vis = self.vis_space.get_origin() b_vis += init_bias_vis except ValueError: raise ValueError("bad shape or value for init_bias_vis") self.bias_vis = sharedX(b_vis, name='bias_vis', borrow=True) try: b_hid = self.hid_space.get_origin() b_hid += init_bias_hid except ValueError: raise ValueError('bad shape or value for init_bias_hid') self.bias_hid = sharedX(b_hid, name='bias_hid', borrow=True) self.random_patches_src = random_patches_src self.register_names_to_del(['random_patches_src']) self.__dict__.update(nhid=nhid, nvis=nvis) self._params = safe_union(self.transformer.get_params(), [self.bias_vis, self.bias_hid]) self.base_lr = base_lr self.anneal_start = anneal_start self.nchains = nchains self.sml_gibbs_steps = sml_gibbs_steps
def __init__(self, numpy_rng = None, theano_rng = None, n_h=99, n_v=100, init_from=None, min_beta=0.9, num_beta=20, gamma=10, cratio=1, cdelay=0, neg_sample_steps=1, lr_spec=None, lr_mults = {}, iscales={}, clip_min={}, clip_max={}, l1 = {}, l2 = {}, sp_weight={}, sp_targ={}, batch_size = 13, compile=True, debug=False, seed=1241234, flags = {}, max_updates = 5e5, **kwargs): """ :param n_h: number of h-hidden units :param n_v: number of visible units :param iscales: optional dictionary containing initialization scale for each parameter :param neg_sample_steps: number of sampling updates to perform in negative phase. :param l1: hyper-parameter controlling amount of L1 regularization :param l2: hyper-parameter controlling amount of L2 regularization :param batch_size: size of positive and negative phase minibatch :param compile: compile sampling and learning functions :param seed: seed used to initialize numpy and theano RNGs. """ Model.__init__(self) Block.__init__(self) assert lr_spec is not None for k in ['h']: assert k in sp_weight.keys() for k in ['h']: assert k in sp_targ.keys() self.validate_flags(flags) self.jobman_channel = None self.jobman_state = {} self.register_names_to_del(['jobman_channel']) ### make sure all parameters are floatX ### for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v) for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v) for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v) for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v) for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) # dump initialization parameters to object for (k,v) in locals().iteritems(): if k!='self': setattr(self,k,v) # allocate random number generators self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng ############### ALLOCATE PARAMETERS ################# # allocate symbolic variable for input self.input = T.matrix('input') self.init_parameters() self.init_chains() # learning rate, with deferred 1./t annealing self.iter = sharedX(0.0, name='iter') if lr_spec['type'] == 'anneal': num = lr_spec['init'] * lr_spec['start'] denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter) self.lr = T.maximum(lr_spec['floor'], num/denum) elif lr_spec['type'] == '1_t': self.lr = npy_floatX(lr_spec['num']) / (self.iter + npy_floatX(lr_spec['denum'])) elif lr_spec['type'] == 'linear': lr_start = npy_floatX(lr_spec['start']) lr_end = npy_floatX(lr_spec['end']) self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates) elif lr_spec['type'] == 'constant': self.lr = sharedX(lr_spec['value'], name='lr') else: raise ValueError('Incorrect value for lr_spec[type]') # configure input-space (new pylearn2 feature?) self.input_space = VectorSpace(n_v) self.output_space = VectorSpace(n_h) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.logz = sharedX(0.0, name='logz') self.cpu_time = 0 self.error_record = [] if compile: self.do_theano() if init_from: raise NotImplementedError()
def __init__(self, input = None, n_u=[100,100], enable={}, load_from=None, iscales=None, clip_min={}, clip_max={}, pos_mf_steps=1, pos_sample_steps=0, neg_sample_steps=1, lr_spec={}, lr_mults = {}, l1 = {}, l2 = {}, l1_inf={}, flags={}, momentum_lambda=0, cg_params = {}, batch_size = 13, computational_bs = 0, compile=True, seed=1241234, sp_targ_h = None, sp_weight_h=None, sp_pos_k = 5, my_save_path=None, save_at=None, save_every=None, max_updates=1e6): """ :param n_u: list, containing number of units per layer. n_u[0] contains number of visible units, while n_u[i] (with i > 0) contains number of hid. units at layer i. :param enable: dictionary of flags with on/off behavior :param iscales: optional dictionary containing initialization scale for each parameter. Key of dictionary should match the name of the associated shared variable. :param pos_mf_steps: number of mean-field iterations to perform in positive phase :param neg_sample_steps: number of sampling updates to perform in negative phase. :param lr: base learning rate :param lr_timestamp: list containing update indices at which to change the lr multiplier :param lr_mults: dictionary, optionally containing a list of learning rate multipliers for parameters of the model. Length of this list should match length of lr_timestamp (the lr mult will transition whenever we reach the associated timestamp). Keys should match the name of the shared variable, whose learning rate is to be adjusted. :param l1: dictionary, whose keys are model parameter names, and values are hyper-parameters controlling degree of L1-regularization. :param l2: same as l1, but for L2 regularization. :param l1_inf: same as l1, but the L1 penalty is centered as -\infty instead of 0. :param cg_params: dictionary with keys ['rtol','damp','maxiter'] :param batch_size: size of positive and negative phase minibatch :param computational_bs: batch size used internaly by natural gradient to reduce memory consumption :param seed: seed used to initialize numpy and theano RNGs. :param my_save_path: if None, do not save model. Otherwise, contains stem of filename to which we will save the model (everything but the extension). :param save_at: list containing iteration counts at which to save model :param save_every: scalar value. Save model every `save_every` iterations. """ Model.__init__(self) Block.__init__(self) ### VALIDATE PARAMETERS AND SET DEFAULT VALUES ### assert lr_spec is not None for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v) for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v) [iscales.setdefault('bias%i' % i, 0.) for i in xrange(len(n_u))] [iscales.setdefault('W%i' % i, 0.1) for i in xrange(len(n_u))] flags.setdefault('enable_centering', False) flags.setdefault('enable_natural', False) flags.setdefault('enable_warm_start', False) flags.setdefault('mlbiases', False) flags.setdefault('precondition', None) flags.setdefault('minres', False) flags.setdefault('minresQLP', False) if flags['precondition'] == 'None': flags['precondition'] = None self.jobman_channel = None self.jobman_state = {} self.register_names_to_del(['jobman_channel']) ### DUMP INITIALIZATION PARAMETERS TO OBJECT ### for (k,v) in locals().iteritems(): if k!='self': setattr(self,k,v) assert len(n_u) > 1 self.n_v = n_u[0] self.depth = len(n_u) # allocate random number generators self.rng = numpy.random.RandomState(seed) self.theano_rng = RandomStreams(self.rng.randint(2**30)) # allocate bilinear-weight matrices self.input = T.matrix() self.init_parameters() self.init_dparameters() self.init_centering() self.init_samples() # learning rate, with deferred 1./t annealing self.iter = sharedX(0.0, name='iter') if lr_spec['type'] == 'anneal': num = lr_spec['init'] * lr_spec['start'] denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter) self.lr = T.maximum(lr_spec['floor'], num/denum) elif lr_spec['type'] == 'linear': lr_start = npy_floatX(lr_spec['start']) lr_end = npy_floatX(lr_spec['end']) self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates) else: raise ValueError('Incorrect value for lr_spec[type]') # counter for CPU-time self.cpu_time = 0. if load_from: self.load_parameters(fname=load_from) # configure input-space (?new pylearn2 feature?) self.input_space = VectorSpace(n_u[0]) self.output_space = VectorSpace(n_u[-1]) self.batches_seen = 0 # incremented on every batch self.examples_seen = 0 # incremented on every training example self.force_batch_size = batch_size # force minibatch size self.error_record = [] if compile: self.do_theano()