def __init__(self, name, inp_dim, hid_dim, out_dim, olayer_type, param_path=None): self.name=name self.ninp=inp_dim self.nhid=hid_dim self.nout=out_dim self.bound=20/math.sqrt(self.ninp) # a magic number for weight initialization. inp=T.matrix() # matrix for batch training. out=T.matrix() hm1=T.matrix() if param_path==None: self.u=self.randomWeights(self.ninp,self.nhid) self.v=self.randomWeights(self.nhid,self.nout) self.w=self.randomWeights(self.nhid,self.nhid) else: f=open(os.path.join(param_path,'params/')+self.name+'.param','rb') self.u=cPickle.load(f) self.v=cPickle.load(f) self.w=cPickle.load(f) f.close() self.vu=self.sharedZeros(self.ninp,self.nhid) self.vv=self.sharedZeros(self.nhid,self.nout) self.vw=self.sharedZeros(self.nhid,self.nhid) # compile the forwardPass function. varies by the output layer type. # if you use chord as a unit, preferably choose sigmoid. # if single notes are used, choose softmax because the network acts as a classifier. h=sigmoid(inp.dot(self.u)+hm1.dot(self.w)) if olayer_type=='sigmoid': o=sigmoid(h.dot(self.v)) elif olayer_type=='softmax': o=T.nnet.softmax(h.dot(self.v)) self.forwardPass=theano.function(inputs=[inp,hm1], outputs=[o,h], allow_input_downcast=True) # compile the loss function. also varies by olayer_type. if olayer_type=='sigmoid': loss=T.sum(T.pow(out-o,2)) elif olayer_type=='softmax': loss=T.sum(-out*T.log(o)) self.calcLoss=theano.function(inputs=[o,out], outputs=loss, allow_input_downcast=True) # compile the velocity and weight update function (network trained with sgd+momentum). # these functions are compiled even the network pre-loads trained params because # I rarely use this program for aesthetic purposes. :) du=T.grad(loss, self.u) dv=T.grad(loss, self.v) dw=T.grad(loss, self.w) alpha=T.scalar() epsilon=T.scalar() self.updateVelocity=theano.function(inputs=[inp,hm1,out,alpha,epsilon], updates=[ (self.vu, alpha*self.vu-epsilon*du), (self.vv, alpha*self.vv-epsilon*dv), (self.vw, alpha*self.vw-epsilon*dw) ], allow_input_downcast=True) self.updateWeights=theano.function(inputs=[], updates=[ (self.u, self.u+self.vu), (self.v, self.v+self.vv), (self.w, self.w+self.vw), ])
def step(self, x_t, h_t_prev, c_t_prev): """ unchanging variables passed to non_sequences, initialization occurs in outputs_info hs is the results sequences: tensor to be looped over, to be scanned. the general order of function parameters to step(fn in tutorial), is sequences(if any), prior results(if any), non-sequenes(if any) Args: x_t: input at current timestamp h_t_prev: hidden states at previous timestamp c_t_prev: cell states at previous timestamp """ # Your codes here i_t = N.sigmoid( T.dot(x_t, self.Wi) + T.dot(h_t_prev, self.Ui) + T.dot(h_t_prev, self.Vi) + self.bi) f_t = N.sigmoid( T.dot(x_t, self.Wf) + T.dot(h_t_prev, self.Uf) + T.dot(h_t_prev, self.Vf) + self.bf) o_t = N.sigmoid( T.dot(x_t, self.Wo) + T.dot(h_t_prev, self.Uo) + T.dot(h_t_prev, self.Vo) + self.bo) c_hat_t = T.tanh( T.dot(x_t, self.Wc) + T.dot(h_t_prev, self.Uc) + self.bc) c_t = f_t * c_t_prev + i_t * c_hat_t h_t = o_t * T.tanh(c_t) return [h_t, h_t, c_t] # the real output, hidden state, cell state
def __init__(self): inSize = BrainBase.inputVectorSize h1Size = 64 self.vIn = vIn = tt.dvector('in') self.vM1 = vM1 = tt.dmatrix('m1') self.vM2 = vM2 = tt.dvector('m2') self.vW1 = vW1 = tt.dvector('w1') self.vW2 = vW2 = tt.dscalar('w2') vH1 = sigmoid(tt.dot(vIn, vM1) + vW1) self.vOut = vOut = sigmoid(tt.dot(vH1, vM2) + vW2) t.pp(vOut) self.evalFun = t.function([vIn, vW1, vM1, vW2, vM2], vOut) self.m1 = 1 / math.sqrt(inSize) * npr.standard_normal((inSize, h1Size)) self.m2 = 1 / math.sqrt(h1Size) * npr.standard_normal((h1Size, )) self.w1 = 1 / math.sqrt(inSize) * npr.standard_normal((h1Size, )) self.w2 = 1 / math.sqrt(h1Size) * npr.standard_normal()
def discriminator_model(dX, dw1, dw2, dw3, dw4, dw5): l1 = relu(batchnorm(conv2d(dX, dw1, subsample=(2, 2), border_mode=(2, 2)))) l2 = relu(batchnorm(conv2d(l1, dw2, subsample=(2, 2), border_mode=(2, 2)))) l3 = relu(batchnorm(conv2d(l2, dw3, subsample=(2, 2), border_mode=(2, 2)))) l3a = l3.flatten(2) l4 = relu(batchnorm(T.dot(l3a, dw4))) l5 = sigmoid((T.dot(l4, dw5))) return l5
def __init__(self, C, regular_coef, n, gamma, use_square_loss, use_cross_entropy): self._regular_coef = regular_coef # Prepare Theano variables for inputs and targets # n length floating vector, 0s replace NULL entries self._X1_sym = theano.tensor.matrix(name='inputs_vec', dtype='float32') # n length binary vector, 1s for the NULL self._X2_sym = theano.tensor.matrix(name='inputs_binary', dtype='float32') self._y_sym = theano.tensor.vector(name='output', dtype='float32') self._w_sym = theano.shared(np.ones((n,), dtype='float64') * 0.01) part1 = theano.tensor.dot(self._X1_sym, self._w_sym) w_abs = theano.tensor.abs_(self._w_sym) part2 = C * theano.tensor.dot(self._X2_sym, w_abs) # If the adversary corrupts randomly, learner can simply treat coordinates as zeros. # Since data is uniform around 0, and w values are also uniform, these coordinates are # insignificant. value = - part1 * self._y_sym + part2 self._xi = gamma - part1 * self._y_sym + part2 p = nnet.sigmoid(value) self._cross_entropy_loss = self._y_sym * theano.tensor.log(p) + (self._y_sym + 1.) * theano.tensor.log(1. - p) self._prediction = 2 * (part1 > 0.) - 1 if use_cross_entropy: # Sigmoid cross entropy loss - as in logistic regression. loss = -self._cross_entropy_loss else: # Margin loss - as in SVM loss = theano.tensor.max(self._xi, 0) if use_square_loss: loss **= 2 self._loss = loss.mean() self._l2_penalty = self._regular_coef * lasagne.regularization.l2(self._w_sym) self._total_loss = self._loss + self._l2_penalty self._acc = theano.tensor.mean(theano.tensor.eq(self._prediction, self._y_sym)) self._corruptor = utils.Corruptor() self.__create_functions()
def __get_h_given_v(self, v): pre_sigmoid_h = T.dot(v, self.W) + self.b h_mean = sigmoid(pre_sigmoid_h) h = self.__bernoulli_sample(h_mean) return h, h_mean, pre_sigmoid_h
def __gibbs_soft(self, X): soft_hid = sigmoid(T.dot(X, self.W) + self.b) # num_data x hid soft_vis = sigmoid(T.dot(soft_hid, self.W.transpose()) + self.c) # num_data x vis return soft_vis, soft_hid
def __get_v_given_h(self, h): pre_sigmoid_v = T.dot(h, self.W.transpose()) + self.c v_mean = sigmoid(pre_sigmoid_v) v = self.__bernoulli_sample(v_mean) return v, v_mean, pre_sigmoid_v
def two_sigmoid(x): return 2 * nnet.sigmoid(x)
def gibbs_step_(self, v_in): h_in = self.activation_h(v_in) h_bin = (h_in + self.t_rng.normal(h_in.shape, 0.0, nnet.sigmoid(h_in))) h_bin = T.maximum(0., h_bin) return self.activation_v(T.cast(h_bin, fx))
def __init__(self, mode, out_layer, idn, hdn, odn, rate, filename="param.txt"): ''' param: mode: "1"=train a new network, "2"=load a saved network from "filename". out_layer: can be 'softmax', 'linear' or 'sigmoid' idn,hdn,odn: neuron number of input/hidden/output layer. rate: learning rate. ''' self.inp_dim = idn self.hid_dim = hdn self.out_dim = odn self.learning_rate = rate self.bound = 20 / math.sqrt(self.inp_dim) if mode == 1: self.u = theano.shared((np.random.random( (self.inp_dim, self.hid_dim)) - 0.5) * self.bound) self.w = theano.shared((np.random.random( (self.hid_dim, self.hid_dim)) - 0.5) * self.bound) self.v = theano.shared((np.random.random( (self.hid_dim, self.out_dim)) - 0.5) * self.bound) if mode == 2: fp = open(filename, 'r') lines = fp.readlines() val = lines[0].split() self.inp_dim = int(val[0]) self.hid_dim = int(val[1]) self.out_dim = int(val[2]) self.u = np.zeros((self.inp_dim, self.hid_dim)) self.w = np.zeros((self.hid_dim, self.hid_dim)) self.v = np.zeros((self.hid_dim, self.out_dim)) curr = 1 for i in xrange(self.inp_dim): for j in xrange(self.hid_dim): self.u[i][j] = float(lines[curr]) curr += 1 for i in xrange(self.hid_dim): for j in xrange(self.hid_dim): self.w[i][j] = float(lines[curr]) curr += 1 for i in xrange(self.hid_dim): for j in xrange(self.out_dim): self.v[i][j] = float(lines[curr]) curr += 1 self.u = theano.shared(self.u) self.w = theano.shared(self.w) self.v = theano.shared(self.v) x = T.matrix() hm1 = T.matrix() h = sigmoid(x.dot(self.u) + hm1.dot(self.w)) if out_layer == 'softmax': o = T.nnet.softmax(h.dot(self.v)) elif out_layer == 'linear': o = h.dot(self.v) elif out_layer == 'sigmoid': o = sigmoid(h.dot(self.v)) self.forward_pass = theano.function(inputs=[x, hm1], outputs=[o, h]) y = T.matrix() if out_layer == 'softmax': loss = T.sum(-y * T.log(o)) elif out_layer == 'linear' or out_layer == 'sigmoid': loss = T.sum(T.pow(o[0] - y[0], 2)) du = T.grad(loss, self.u) dw = T.grad(loss, self.w) dv = T.grad(loss, self.v) self.update=theano.function(inputs=[x,hm1,y],updates=[(self.u,self.u-self.learning_rate*du),\ (self.w,self.w-self.learning_rate*dw),\ (self.v,self.v-self.learning_rate*dv)],)