예제 #1
0
    def get_sparsity_cost(self):

        # update mean activation using exponential moving average
        hack_h = self.h_given_v(self.sp_pos_v)

        # define loss based on value of sp_type
        if self.sp_type == 'kl':
            eps = npy_floatX(1. / self.batch_size)
            loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \
                                     - npy_floatX(1-targ) * T.log(1 - val + eps)
        else:
            raise NotImplementedError('Sparsity type %s is not implemented' %
                                      self.sp_type)

        cost = T.zeros((), dtype=floatX)

        params = []
        if self.sp_weight['h']:
            cost += self.sp_weight['h'] * T.sum(
                loss(self.sp_targ['h'], hack_h.mean(axis=0)))
            params += [self.hbias]

        if self.sp_type in ['kl'] and self.sp_weight['h']:
            params += [self.Wv, self.alpha, self.mu]
            if self.flags.get('split_norm', False):
                params += [self.wv_norms]

        return utils_cost.Cost(cost, params)
예제 #2
0
    def get_sparsity_cost(self):

        # update mean activation using exponential moving average
        hack_h = self.h_given_v(self.sp_pos_v)

        # define loss based on value of sp_type
        if self.sp_type == 'kl':
            eps = npy_floatX(1./self.batch_size)
            loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \
                                     - npy_floatX(1-targ) * T.log(1 - val + eps)
        else:
            raise NotImplementedError('Sparsity type %s is not implemented' % self.sp_type)

        cost = T.zeros((), dtype=floatX)

        params = []
        if self.sp_weight['h']: 
            cost += self.sp_weight['h']  * T.sum(loss(self.sp_targ['h'], hack_h.mean(axis=0)))
            params += [self.hbias]

        if self.sp_type in ['kl'] and self.sp_weight['h']:
            params += [self.Wv, self.alpha, self.mu]
            if self.flags['split_norm']:
                params += [self.scalar_norms]

        return costmod.Cost(cost, params)
예제 #3
0
    def get_sparsity_cost(self, pos_g, pos_h):

        # update mean activation using exponential moving average
        hack_g = self.g_given_hv(pos_h, self.input)
        hack_h = self.h_given_gv(pos_g, self.input)

        # define loss based on value of sp_type
        eps = npy_floatX(1./self.batch_size)
        loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \
                                 - npy_floatX(1-targ) * T.log(1 - val + eps)

        params = []
        cost = T.zeros((), dtype=floatX)
        if self.sp_weight['g'] or self.sp_weight['h']:
            params += [self.Wv, self.alpha, self.mu]
            if self.sp_weight['g']:
                cost += self.sp_weight['g']  * T.sum(loss(self.sp_targ['g'], hack_g.mean(axis=0)))
                params += [self.gbias]
            if self.sp_weight['h']:
                cost += self.sp_weight['h']  * T.sum(loss(self.sp_targ['h'], hack_h.mean(axis=0)))
                params += [self.hbias]
            if self.flags['split_norm']:
                params += [self.scalar_norms]
        cte = [pos_g, pos_h]
        return costmod.Cost(cost, params, cte)
예제 #4
0
    def get_sparsity_cost(self):
        hack_h = self.h_given_v(self.input)
        # define loss based on value of sp_type
        eps = npy_floatX(1e-5)
        loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \
                                 - npy_floatX(1-targ) * T.log(1 - val + eps)

        params = []
        cost = T.zeros((), dtype=floatX)
        if self.sp_weight['h']:
            params += [self.Wv, self.hbias]
            cost += self.sp_weight['h']  * T.sum(loss(self.sp_targ['h'], hack_h).mean(axis=0))

        return costmod.Cost(cost, params)
예제 #5
0
    def get_sparsity_cost(self):
        hack_h = self.h_given_v(self.input)
        # define loss based on value of sp_type
        eps = npy_floatX(1. / self.batch_size)
        loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \
                                 - npy_floatX(1-targ) * T.log(1 - val + eps)

        params = []
        cost = T.zeros((), dtype=floatX)
        if self.sp_weight['h']:
            params += [self.Wv, self.hbias]
            cost += self.sp_weight['h'] * T.sum(
                loss(self.sp_targ['h'], hack_h.mean(axis=0)))

        return costmod.Cost(cost, params, [hack_h])
예제 #6
0
    def __init__(self, 
            numpy_rng = None, theano_rng = None,
            n_h=99, n_v=100, init_from=None,
            min_beta=0.9, num_beta=20, gamma=10, cratio=1, cdelay=0,
            neg_sample_steps=1,
            lr_spec=None, lr_mults = {},
            iscales={}, clip_min={}, clip_max={},
            l1 = {}, l2 = {},
            sp_weight={}, sp_targ={},
            batch_size = 13,
            compile=True, debug=False, seed=1241234,
            flags = {},
            max_updates = 5e5, **kwargs):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr_spec is not None
        for k in ['h']: assert k in sp_weight.keys()
        for k in ['h']: assert k in sp_targ.keys()
        self.validate_flags(flags)

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v)
        for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v)
        for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v)
        for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v)
        for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v)
        for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k,v) in locals().iteritems():
            if k!='self': setattr(self,k,v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng

        ############### ALLOCATE PARAMETERS #################
        # allocate symbolic variable for input
        self.input = T.matrix('input')
        self.init_parameters()
        self.init_chains()

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr_spec['type'] == 'anneal':
            num = lr_spec['init'] * lr_spec['start'] 
            denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter)
            self.lr = T.maximum(lr_spec['floor'], num/denum) 
        elif lr_spec['type'] == '1_t':
            self.lr = npy_floatX(lr_spec['num']) / (self.iter + npy_floatX(lr_spec['denum']))
        elif lr_spec['type'] == 'linear':
            lr_start = npy_floatX(lr_spec['start'])
            lr_end   = npy_floatX(lr_spec['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates)
        elif lr_spec['type'] == 'constant':
            self.lr = sharedX(lr_spec['value'], name='lr')
        else:
            raise ValueError('Incorrect value for lr_spec[type]')

        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0               # incremented on every batch
        self.examples_seen = 0              # incremented on every training example
        self.logz = sharedX(0.0, name='logz')
        self.cpu_time = 0

        self.error_record = []
 
        if compile: self.do_theano()

        if init_from:
            raise NotImplementedError()
예제 #7
0
    def __init__(self,
                 input=None,
                 Wv=None,
                 vbias=None,
                 hbias=None,
                 numpy_rng=None,
                 theano_rng=None,
                 n_h=100,
                 bw_s=1,
                 n_v=100,
                 init_from=None,
                 neg_sample_steps=1,
                 lr=None,
                 lr_timestamp=None,
                 lr_mults={},
                 iscales={},
                 clip_min={},
                 clip_max={},
                 vbound=5.,
                 l1={},
                 l2={},
                 orth_lambda=0.,
                 var_param_alpha='exp',
                 var_param_beta='linear',
                 sp_type='kl',
                 sp_weight={},
                 sp_targ={},
                 batch_size=13,
                 scalar_b=False,
                 compile=True,
                 debug=False,
                 seed=1241234,
                 my_save_path=None,
                 save_at=None,
                 save_every=None,
                 flags={},
                 max_updates=5e5):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr is not None
        for k in ['Wv', 'vbias', 'hbias']:
            assert k in iscales.keys()
        iscales.setdefault('mu', 1.)
        iscales.setdefault('alpha', 0.)
        iscales.setdefault('beta', 0.)
        for k in ['h']:
            assert k in sp_weight.keys()
        for k in ['h']:
            assert k in sp_targ.keys()

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k, v) in l1.iteritems():
            l1[k] = npy_floatX(v)
        for (k, v) in l2.iteritems():
            l2[k] = npy_floatX(v)
        for (k, v) in sp_weight.iteritems():
            sp_weight[k] = npy_floatX(v)
        for (k, v) in sp_targ.iteritems():
            sp_targ[k] = npy_floatX(v)
        for (k, v) in clip_min.iteritems():
            clip_min[k] = npy_floatX(v)
        for (k, v) in clip_max.iteritems():
            clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k, v) in locals().iteritems():
            if k != 'self': setattr(self, k, v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(
            seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(
            2**30)) if theano_rng is None else theano_rng

        ############### ALLOCATE PARAMETERS #################
        self.n_s = self.n_h * self.bw_s

        self.wv_norms = sharedX(1.0 * numpy.ones(self.n_s), name='wv_norms')
        if Wv is None:
            wv_val = self.rng.randn(n_v, self.n_s) * iscales['Wv']
            self.Wv = sharedX(wv_val, name='Wv')
        else:
            self.Wv = Wv

        self.Wh = numpy.zeros((self.n_s, self.n_h), dtype=floatX)
        for i in xrange(self.n_h):
            self.Wh[i * bw_s:(i + 1) * bw_s, i] = 1.

        # allocate shared variables for bias parameters
        if hbias is None:
            self.hbias = sharedX(iscales['hbias'] * numpy.ones(n_h),
                                 name='hbias')
        else:
            self.hbias = hbias

        # mean (mu) and precision (alpha) parameters on s
        self.mu = sharedX(iscales['mu'] * numpy.ones(self.n_s), name='mu')
        self.alpha = sharedX(iscales['alpha'] * numpy.ones(self.n_s),
                             name='alpha')
        var_param_func = {
            'exp': T.exp,
            'softplus': T.nnet.softplus,
            'linear': lambda x: x
        }
        self.alpha_prec = var_param_func[self.var_param_alpha](self.alpha)

        # diagonal of precision matrix of visible units
        self.vbound = sharedX(vbound, name='vbound')
        self.beta = sharedX(iscales['beta'] * numpy.ones(n_v), name='beta')
        self.beta_prec = var_param_func[self.var_param_beta](self.beta)

        # allocate shared variable for persistent chain
        self.neg_v = sharedX(self.rng.rand(batch_size, n_v), name='neg_v')
        self.neg_ev = sharedX(self.rng.rand(batch_size, n_v), name='neg_ev')
        self.neg_s = sharedX(self.rng.rand(batch_size, self.n_s), name='neg_s')
        self.neg_h = sharedX(self.rng.rand(batch_size, n_h), name='neg_h')

        # moving average values for sparsity
        self.sp_pos_v = sharedX(self.rng.rand(1, self.n_v), name='sp_pos_v')
        self.sp_pos_h = sharedX(self.rng.rand(1, self.n_h), name='sp_pog_h')

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr['type'] == 'anneal':
            num = lr['init'] * lr['start']
            denum = T.maximum(lr['start'], lr['slope'] * self.iter)
            self.lr = T.maximum(lr['floor'], num / denum)
        elif lr['type'] == 'linear':
            lr_start = npy_floatX(lr['start'])
            lr_end = npy_floatX(lr['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(
                self.max_updates)
        else:
            raise ValueError('Incorrect value for lr[type]')

        # learning rate - implemented as shared parameter for GPU
        self.lr_mults_it = {}
        self.lr_mults_shrd = {}
        for (k, v) in lr_mults.iteritems():
            # make sure all learning rate multipliers are float64
            self.lr_mults_it[k] = tools.HyperParamIterator(
                lr_timestamp, lr_mults[k])
            self.lr_mults_shrd[k] = sharedX(self.lr_mults_it[k].value,
                                            name='lr_mults_shrd' + k)

        # allocate symbolic variable for input
        self.input = T.matrix('input') if input is None else input

        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0  # incremented on every batch
        self.examples_seen = 0  # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size

        self.error_record = []

        if compile: self.do_theano()

        #### load layer 1 parameters from file ####
        if init_from:
            self.load_params(init_from)
예제 #8
0
    def __init__(self, numpy_rng = None, theano_rng = None,
            n_h=100, bw_s=1, n_v=100, init_from=None,
            neg_sample_steps=1,
            lr_spec=None, lr_timestamp=None, lr_mults = {},
            iscales={}, clip_min={}, clip_max={}, truncation_bound={},
            l1 = {}, l2 = {}, orth_lambda=0.,
            var_param_alpha='exp', var_param_lambd='linear',
            sp_type='kl', sp_weight={}, sp_targ={},
            batch_size = 13,
            compile=True,
            debug=False,
            seed=1241234,
            my_save_path=None, save_at=None, save_every=None,
            flags = {},
            max_updates = 5e5):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr_spec is not None
        for k in ['Wv', 'hbias']: assert k in iscales.keys()
        iscales.setdefault('mu', 1.)
        iscales.setdefault('alpha', 0.)
        iscales.setdefault('lambd', 0.)
        for k in ['h']: assert k in sp_weight.keys()
        for k in ['h']: assert k in sp_targ.keys()
        self.validate_flags(flags)

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v)
        for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v)
        for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v)
        for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v)
        for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v)
        for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k,v) in locals().iteritems():
            if k!='self': setattr(self,k,v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng

        # allocate symbolic variable for input
        self.input = T.matrix('input')
        self.init_parameters()
        self.init_chains()

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr_spec['type'] == 'anneal':
            num = lr_spec['init'] * lr_spec['start'] 
            denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter)
            self.lr = T.maximum(lr_spec['floor'], num/denum) 
        elif lr_spec['type'] == 'linear':
            lr_start = npy_floatX(lr_spec['start'])
            lr_end   = npy_floatX(lr_spec['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates)
        else:
            raise ValueError('Incorrect value for lr_spec[type]')

        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0                    # incremented on every batch
        self.examples_seen = 0                   # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size

        self.error_record = []
 
        if compile: self.do_theano()

        #### load layer 1 parameters from file ####
        if init_from:
            self.load_params(init_from)
예제 #9
0
    def __init__(self, 
            input=None, Wv=None, vbias=None, hbias=None,
            numpy_rng = None, theano_rng = None,
            n_h=100, bw_s=1, n_v=100, init_from=None,
            neg_sample_steps=1,
            lr=None, lr_timestamp=None, lr_mults = {},
            iscales={}, clip_min={}, clip_max={}, l1 = {}, l2 = {},
            sp_type='kl', sp_weight={}, sp_targ={},
            batch_size = 13,
            compile=True,
            debug=False,
            seed=1241234,
            my_save_path=None, save_at=None, save_every=None,
            flags = {},
            max_updates = 5e5):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr is not None
        for k in ['Wv', 'vbias', 'hbias']: assert k in iscales.keys()
        iscales.setdefault('mu', 1.)
        iscales.setdefault('alpha', 0.)
        for k in ['h']: assert k in sp_weight.keys()
        for k in ['h']: assert k in sp_targ.keys()

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v)
        for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v)
        for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v)
        for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v)
        for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v)
        for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k,v) in locals().iteritems():
            if k!='self': setattr(self,k,v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng

        ############### ALLOCATE PARAMETERS #################
        self.n_s = self.n_h * self.bw_s

        if Wv is None:
            wv_val =  self.rng.randn(n_v, self.n_s) * iscales['Wv']
            self.Wv = sharedX(wv_val, name='Wv')
        else:
            self.Wv = Wv

        self.Wh = numpy.zeros((self.n_s, self.n_h), dtype=floatX)
        for i in xrange(self.n_h):
            self.Wh[i*bw_s:(i+1)*bw_s, i] = 1.

        # allocate shared variables for bias parameters
        if vbias is None:
            self.vbias = sharedX(iscales['vbias'] * numpy.ones(n_v), name='vbias') 
        else:
            self.vbias = vbias

        # allocate shared variables for bias parameters
        if hbias is None:
            self.hbias = sharedX(iscales['hbias'] * numpy.ones(n_h), name='hbias') 
        else:
            self.hbias = hbias

        # mean (mu) and precision (alpha) parameters on s
        self.mu = sharedX(iscales['mu'] * numpy.ones(self.n_s), name='mu')
        self.alpha = sharedX(iscales['alpha'] * numpy.ones(self.n_s), name='alpha')
        self.alpha_prec = T.exp(self.alpha)

        #### load layer 1 parameters from file ####
        if init_from:
            self.load_params(init_from)

        # allocate shared variable for persistent chain
        self.neg_v  = sharedX(self.rng.rand(batch_size, n_v), name='neg_v')
        self.neg_ev = sharedX(self.rng.rand(batch_size, n_v), name='neg_ev')
        self.neg_s  = sharedX(self.rng.rand(batch_size, self.n_s), name='neg_s')
        self.neg_h  = sharedX(self.rng.rand(batch_size, n_h), name='neg_h')
       
        # moving average values for sparsity
        self.sp_pos_v = sharedX(self.rng.rand(1,self.n_v), name='sp_pos_v')
        self.sp_pos_h = sharedX(self.rng.rand(1,self.n_h), name='sp_pog_h')

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr['type'] == 'anneal':
            num = lr['init'] * lr['start'] 
            denum = T.maximum(lr['start'], lr['slope'] * self.iter)
            self.lr = T.maximum(lr['floor'], num/denum) 
        elif lr['type'] == 'linear':
            lr_start = npy_floatX(lr['start'])
            lr_end   = npy_floatX(lr['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(self.max_updates)
        else:
            raise ValueError('Incorrect value for lr[type]')

        # learning rate - implemented as shared parameter for GPU
        self.lr_mults_it = {}
        self.lr_mults_shrd = {}
        for (k,v) in lr_mults.iteritems():
            # make sure all learning rate multipliers are float64
            self.lr_mults_it[k] = tools.HyperParamIterator(lr_timestamp, lr_mults[k])
            self.lr_mults_shrd[k] = sharedX(self.lr_mults_it[k].value, 
                                            name='lr_mults_shrd'+k)

        # allocate symbolic variable for input
        self.input = T.matrix('input') if input is None else input
        
        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0                    # incremented on every batch
        self.examples_seen = 0                   # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size

        self.error_record = []
        
        if compile: self.do_theano()
예제 #10
0
    def __init__(self,
                 numpy_rng=None,
                 theano_rng=None,
                 n_h=99,
                 n_v=100,
                 init_from=None,
                 neg_sample_steps=1,
                 lr_spec=None,
                 lr_mults={},
                 iscales={},
                 clip_min={},
                 clip_max={},
                 l1={},
                 l2={},
                 sp_weight={},
                 sp_targ={},
                 batch_size=13,
                 compile=True,
                 debug=False,
                 seed=1241234,
                 my_save_path=None,
                 save_at=None,
                 save_every=None,
                 flags={},
                 max_updates=5e5):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        Model.__init__(self)
        Block.__init__(self)
        assert lr_spec is not None
        for k in ['h']:
            assert k in sp_weight.keys()
        for k in ['h']:
            assert k in sp_targ.keys()
        self.validate_flags(flags)

        self.jobman_channel = None
        self.jobman_state = {}
        self.register_names_to_del(['jobman_channel'])

        ### make sure all parameters are floatX ###
        for (k, v) in l1.iteritems():
            l1[k] = npy_floatX(v)
        for (k, v) in l2.iteritems():
            l2[k] = npy_floatX(v)
        for (k, v) in sp_weight.iteritems():
            sp_weight[k] = npy_floatX(v)
        for (k, v) in sp_targ.iteritems():
            sp_targ[k] = npy_floatX(v)
        for (k, v) in clip_min.iteritems():
            clip_min[k] = npy_floatX(v)
        for (k, v) in clip_max.iteritems():
            clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k, v) in locals().iteritems():
            if k != 'self': setattr(self, k, v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(
            seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(
            2**30)) if theano_rng is None else theano_rng

        ############### ALLOCATE PARAMETERS #################
        # allocate symbolic variable for input
        self.input = T.matrix('input')
        self.init_parameters()
        self.init_chains()

        # learning rate, with deferred 1./t annealing
        self.iter = sharedX(0.0, name='iter')

        if lr_spec['type'] == 'anneal':
            num = lr_spec['init'] * lr_spec['start']
            denum = T.maximum(lr_spec['start'], lr_spec['slope'] * self.iter)
            self.lr = T.maximum(lr_spec['floor'], num / denum)
        elif lr_spec['type'] == 'linear':
            lr_start = npy_floatX(lr_spec['start'])
            lr_end = npy_floatX(lr_spec['end'])
            self.lr = lr_start + self.iter * (lr_end - lr_start) / npy_floatX(
                self.max_updates)
        else:
            raise ValueError('Incorrect value for lr_spec[type]')

        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)
        self.output_space = VectorSpace(n_h)

        self.batches_seen = 0  # incremented on every batch
        self.examples_seen = 0  # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size

        self.error_record = []

        if compile: self.do_theano()

        if init_from:
            raise NotImplementedError()
예제 #11
0
파일: ssrbm.py 프로젝트: codeaudit/ssrbm
    def __init__(self, 
            input=None, Wv=None, hbias=None,
            numpy_rng = None, theano_rng = None,
            n_h=100, n_v=100, bw_h=10, init_from=None,
            neg_sample_steps=1,
            lr = 1e-3, lr_anneal_coeff=0, lr_timestamp=None, lr_mults = {},
            iscales={}, clip_min={}, clip_max={}, l1 = {}, l2 = {},
            sp_moving_avg=0.98, sp_type='KL', sp_weight={}, sp_targ={},
            batch_size = 13,
            scalar_b = False,
            sparse_hmask = None, 
            learn_h_weights = False,
            unit_norm_filters = True,
            compile=True,
            parametrize_sqrt_precision=True,
            debug=False,
            seed=1241234,
            my_save_path=None):
        """
        :param n_h: number of h-hidden units
        :param n_v: number of visible units
        :param iscales: optional dictionary containing initialization scale for each parameter
        :param neg_sample_steps: number of sampling updates to perform in negative phase.
        :param l1: hyper-parameter controlling amount of L1 regularization
        :param l2: hyper-parameter controlling amount of L2 regularization
        :param batch_size: size of positive and negative phase minibatch
        :param compile: compile sampling and learning functions
        :param seed: seed used to initialize numpy and theano RNGs.
        """
        super(ssRBM,self).__init__()
        for k in ['mu','alpha','beta', 'Wv', 'hbias']: assert k in iscales.keys()
        for k in ['h']: assert k in sp_weight.keys()
        for k in ['h']: assert k in sp_targ.keys()

        ### make sure all parameters are floatX ###
        for (k,v) in l1.iteritems(): l1[k] = npy_floatX(v)
        for (k,v) in l2.iteritems(): l2[k] = npy_floatX(v)
        for (k,v) in sp_weight.iteritems(): sp_weight[k] = npy_floatX(v)
        for (k,v) in sp_targ.iteritems(): sp_targ[k] = npy_floatX(v)
        for (k,v) in clip_min.iteritems(): clip_min[k] = npy_floatX(v)
        for (k,v) in clip_max.iteritems(): clip_max[k] = npy_floatX(v)

        # dump initialization parameters to object
        for (k,v) in locals().iteritems():
            if k!='self': setattr(self,k,v)

        # allocate random number generators
        self.rng = numpy.random.RandomState(seed) if numpy_rng is None else numpy_rng
        self.theano_rng = RandomStreams(self.rng.randint(2**30)) if theano_rng is None else theano_rng

        ############### ALLOCATE PARAMETERS #################
        self.n_s = self.n_h * bw_h

        # allocate bilinear-weight matrices
        self.Wh = sharedX(sparse_hmask.mask, name='Wh')

        if Wv is None:
            wv_val =  self.rng.randn(n_v, self.n_s) * iscales['Wv']
            self.Wv = sharedX(wv_val, name='Wv')
        else:
            self.Wv = Wv

        # allocate shared variables for bias parameters
        if hbias is None:
            self.hbias = sharedX(iscales['hbias'] * numpy.ones(n_h), name='hbias') 
        else:
            self.hbias = hbias

        # mean (mu) and precision (alpha) parameters on s
        self.mu = sharedX(iscales['mu'] * numpy.ones(self.n_s), name='mu')
        self.alpha = sharedX(iscales['alpha'] * numpy.ones(self.n_s), name='alpha')
        self.alpha_prec = self.alpha**2 if parametrize_sqrt_precision else self.alpha

        # diagonal of precision matrix of visible units
        self.beta = sharedX(iscales['beta'] * numpy.ones(n_v), name='beta')
        self.beta_prec = self.beta**2 if parametrize_sqrt_precision else self.beta

        #### load layer 1 parameters from file ####
        if init_from:
            self.load_params(init_from)

        # allocate shared variable for persistent chain
        self.neg_v  = sharedX(self.rng.rand(batch_size, n_v), name='neg_v')
        self.neg_ev = sharedX(self.rng.rand(batch_size, n_v), name='neg_ev')
        self.neg_s  = sharedX(self.rng.rand(batch_size, self.n_s), name='neg_s')
        self.neg_h  = sharedX(self.rng.rand(batch_size, n_h), name='neg_h')
       
        # moving average values for sparsity
        self.sp_pos_v = sharedX(self.rng.rand(1,self.n_v), name='sp_pos_v')
        self.sp_pos_h = sharedX(self.rng.rand(1,self.n_h), name='sp_pog_h')

        # learning rate - implemented as shared parameter for GPU
        self.lr_shrd = sharedX(lr, name='lr_shrd')
        self.lr_mults_it = {}
        self.lr_mults_shrd = {}
        for (k,v) in lr_mults.iteritems():
            # make sure all learning rate multipliers are float64
            self.lr_mults_it[k] = tools.HyperParamIterator(lr_timestamp, lr_mults[k])
            self.lr_mults_shrd[k] = sharedX(self.lr_mults_it[k].value, 
                                            name='lr_mults_shrd'+k)

        # allocate symbolic variable for input
        self.input = T.matrix('input') if input is None else input
        
        # configure input-space (new pylearn2 feature?)
        self.input_space = VectorSpace(n_v)

        # counters used by pylearn2 trainers
        self.batches_seen = 0                    # incremented on every batch
        self.examples_seen = 0                   # incremented on every training example
        self.force_batch_size = batch_size  # force minibatch size

        self.error_record = []

        ## ESTABLISH LIST OF LEARNT MODEL PARAMETERS ##
        self.params = [self.Wv, self.hbias, self.mu, self.alpha, self.beta]
        if self.learn_h_weights:
            self.params += [self.Wh]
        
        if compile: self.do_theano()