def __init__(self, xdim, args, dec_nonlin=None): self.xdim = xdim self.hdim = args.hdim self.zdim = args.zdim self.lmbda = args.lmbda # weight decay coefficient * 2 self.x = T.matrix('x', dtype=floatX) self.eps = T.matrix('eps', dtype=floatX) self.train_i = T.scalar('train_i', dtype=floatX) self.dec = args.decM self.COV = args.COV self.enc_mlp = GaussianMLP(self.x, self.xdim, self.hdim, self.zdim, nlayers=args.nlayers, eps=self.eps, COV=self.COV) if self.dec == 'bernoulli': # log p(x | z) defined as -CE(x, y) = dec_mlp.cost(y) self.dec_mlp = BernoulliMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x) elif self.dec == 'gaussian': self.dec_mlp = GaussianMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x, activation=dec_nonlin, COV=self.COV) else: raise RuntimeError('unrecognized decoder %' % dec) #encoder part + decoder part if self.COV == False: self.enc_cost = -T.sum(kld_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var)) else: self.enc_cost = -T.sum(kldu_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var, self.enc_mlp.u)) self.cost = (self.enc_cost + self.dec_mlp.cost) / args.batsize self.params = self.enc_mlp.params + self.dec_mlp.params ##[T.grad(self.cost, p) + self.lmbda * p for p in self.params] self.gparams = [T.grad(self.cost, p) for p in self.params] self.gaccums = [shared(value=np.zeros(p.get_value().shape, dtype=floatX)) for p in self.params] self.lr = args.lr * (1-args.lmbda)**self.train_i # update params, update sum(grad_params) for adagrade self.updates = [ (param, param - self.lr*gparam/T.sqrt(gaccum+T.square(gparam)+ADAG_EPS)) for param, gparam, gaccum in zip(self.params, self.gparams, self.gaccums) ] self.updates += [ (gaccum, gaccum + T.square(gparam)) for gaccum, gparam in zip(self.gaccums, self.gparams) ] self.train = function( inputs=[self.x, self.eps, self.train_i], outputs=self.cost, updates=self.updates ) self.test = function( inputs=[self.x, self.eps], outputs=self.cost, updates=None ) # can be used for semi-supervised learning for example self.encode = function( inputs=[self.x, self.eps], outputs=self.enc_mlp.out ) # use this to sample self.decode = function( inputs=[self.enc_mlp.out], ##z with shape (1,2) outputs=self.dec_mlp.out ) ##mlp103 .out=.mu+.sigma*eps
def __init__(self, xdim, args, dec='bernoulli'): self.xdim = xdim self.hdim = args.hdim self.zdim = args.zdim self.lmbda = args.lmbda # weight decay coefficient * 2 self.x = T.matrix('x', dtype=floatX) self.eps = T.matrix('eps', dtype=floatX) # XXX make this more general self.enc_mlp = GaussianMLP(self.x, self.xdim, self.hdim, self.zdim, nlayers=args.nlayers, eps=self.eps) if dec == 'bernoulli': # log p(x | z) defined as -CE(x, y) = dec_mlp.cost(y) self.dec_mlp = BernoulliMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x) elif dec == 'gaussian': self.dec_mlp = GaussianMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x) else: raise RuntimeError('unrecognized decoder %' % dec) self.cost = (-T.sum(kld_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var)) + self.dec_mlp.cost) / args.batch_size self.params = self.enc_mlp.params + self.dec_mlp.params print(self.params) self.gparams = [T.grad(self.cost, p) + self.lmbda * p for p in self.params] self.gaccums = [theano.shared(value=np.zeros(p.get_value().shape, dtype=floatX)) for p in self.params] # XXX using adagrad update as described in paper, could try other optimizers self.updates = [ (param, param - args.lr * gparam / T.sqrt(gaccum + T.square(gparam) + ADAGRAD_EPS)) for param, gparam, gaccum in zip(self.params, self.gparams, self.gaccums) ] self.updates += [ (gaccum, gaccum + T.square(gparam)) for gaccum, gparam in zip(self.gaccums, self.gparams) ] self.train = theano.function( inputs=[self.x, self.eps], outputs=self.cost, updates=self.updates ) self.test = theano.function( inputs=[self.x, self.eps], outputs=self.cost, updates=None ) # can be used for semi-supervised learning for example self.encode = theano.function( inputs=[self.x, self.eps], outputs=self.enc_mlp.out ) # use this to sample self.decode = theano.function( inputs=[self.enc_mlp.out], outputs=self.dec_mlp.out )
def __init__(self, xdim, args, dec='bernoulli'): self.xdim = xdim self.hdim = args.hdim self.zdim = args.zdim self.lmbda = args.lmbda # weight decay coefficient * 2 self.x = T.matrix('x', dtype=floatX) self.eps = T.matrix('eps', dtype=floatX) # XXX make this more general self.enc_mlp = GaussianMLP(self.x, self.xdim, self.hdim, self.zdim, nlayers=args.nlayers, eps=self.eps) if dec == 'bernoulli': # log p(x | z) defined as -CE(x, y) = dec_mlp.cost(y) self.dec_mlp = BernoulliMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x) elif dec == 'gaussian': self.dec_mlp = GaussianMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x) else: raise RuntimeError('unrecognized decoder %' % dec) self.cost = (-T.sum(kld_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var)) + self.dec_mlp.cost) / args.batch_size self.params = self.enc_mlp.params + self.dec_mlp.params print(self.params) self.gparams = [ T.grad(self.cost, p) + self.lmbda * p for p in self.params ] self.gaccums = [ theano.shared(value=np.zeros(p.get_value().shape, dtype=floatX)) for p in self.params ] # XXX using adagrad update as described in paper, could try other optimizers self.updates = [( param, param - args.lr * gparam / T.sqrt(gaccum + T.square(gparam) + ADAGRAD_EPS)) for param, gparam, gaccum in zip( self.params, self.gparams, self.gaccums)] self.updates += [(gaccum, gaccum + T.square(gparam)) for gaccum, gparam in zip(self.gaccums, self.gparams)] self.train = theano.function(inputs=[self.x, self.eps], outputs=self.cost, updates=self.updates) self.test = theano.function(inputs=[self.x, self.eps], outputs=self.cost, updates=None) # can be used for semi-supervised learning for example self.encode = theano.function(inputs=[self.x, self.eps], outputs=self.enc_mlp.out) # use this to sample self.decode = theano.function(inputs=[self.enc_mlp.out], outputs=self.dec_mlp.out)
def __init__(self, xdim, args, dec_nonlin=None): self.xdim = xdim self.hdim = args.hdim self.zdim = args.zdim self.lmbda = args.lmbda # weight decay coefficient * 2 self.x = T.matrix('x', dtype=floatX) self.eps = T.matrix('eps', dtype=floatX) self.train_i = T.scalar('train_i', dtype=floatX) self.dec = args.decM self.COV = args.COV self.enc_mlp = GaussianMLP(self.x, self.xdim, self.hdim, self.zdim, nlayers=args.nlayers, eps=self.eps, COV=self.COV) if self.dec == 'bernoulli': # log p(x | z) defined as -CE(x, y) = dec_mlp.cost(y) self.dec_mlp = BernoulliMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x) elif self.dec == 'gaussian': self.dec_mlp = GaussianMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x, activation=dec_nonlin, COV=self.COV) else: raise RuntimeError('unrecognized decoder %' % dec) #encoder part + decoder part if self.COV == False: self.enc_cost = -T.sum( kld_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var)) else: self.enc_cost = -T.sum( kldu_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var, self.enc_mlp.u)) self.cost = (self.enc_cost + self.dec_mlp.cost) / args.batsize self.params = self.enc_mlp.params + self.dec_mlp.params ##[T.grad(self.cost, p) + self.lmbda * p for p in self.params] self.gparams = [T.grad(self.cost, p) for p in self.params] self.gaccums = [ shared(value=np.zeros(p.get_value().shape, dtype=floatX)) for p in self.params ] self.lr = args.lr * (1 - args.lmbda)**self.train_i # update params, update sum(grad_params) for adagrade self.updates = [ (param, param - self.lr * gparam / T.sqrt(gaccum + T.square(gparam) + ADAG_EPS)) for param, gparam, gaccum in zip(self.params, self.gparams, self.gaccums) ] self.updates += [(gaccum, gaccum + T.square(gparam)) for gaccum, gparam in zip(self.gaccums, self.gparams)] self.train = function(inputs=[self.x, self.eps, self.train_i], outputs=self.cost, updates=self.updates) self.test = function(inputs=[self.x, self.eps], outputs=self.cost, updates=None) # can be used for semi-supervised learning for example self.encode = function(inputs=[self.x, self.eps], outputs=self.enc_mlp.out) # use this to sample self.decode = function( inputs=[self.enc_mlp.out], ##z with shape (1,2) outputs=self.dec_mlp.out) ##mlp103 .out=.mu+.sigma*eps