def connect(self, l_in): self.l_in = l_in self.n_in = l_in.size self.h0 = shared0s((1, self.size)) self.w_z = self.init((self.n_in, self.size)) self.w_r = self.init((self.n_in, self.size)) self.u_z = self.init((self.size, self.size)) self.u_r = self.init((self.size, self.size)) self.b_z = shared0s((self.size)) self.b_r = shared0s((self.size)) if 'maxout' in self.activation_str: self.w_h = self.init((self.n_in, self.size * 2)) self.u_h = self.init((self.size, self.size * 2)) self.b_h = shared0s((self.size * 2)) else: self.w_h = self.init((self.n_in, self.size)) self.u_h = self.init((self.size, self.size)) self.b_h = shared0s((self.size)) self.params = [ self.h0, self.w_z, self.w_r, self.w_h, self.u_z, self.u_r, self.u_h, self.b_z, self.b_r, self.b_h ] if self.weights is not None: for param, weight in zip(self.params, self.weights): param.set_value(floatX(weight))
def connect(self, l_in): self.l_in = l_in self.n_in = l_in.size self.w_i = self.init((self.n_in, self.size)) self.w_f = self.init((self.n_in, self.size)) self.w_o = self.init((self.n_in, self.size)) self.w_c = self.init((self.n_in, self.size)) self.b_i = shared0s((self.size)) self.b_f = shared0s((self.size)) self.b_o = shared0s((self.size)) self.b_c = shared0s((self.size)) self.u_i = self.init((self.size, self.size)) self.u_f = self.init((self.size, self.size)) self.u_o = self.init((self.size, self.size)) self.u_c = self.init((self.size, self.size)) self.params = [ self.w_i, self.w_f, self.w_o, self.w_c, self.u_i, self.u_f, self.u_o, self.u_c, self.b_i, self.b_f, self.b_o, self.b_c ] if self.weights is not None: for param, weight in zip(self.params, self.weights): param.set_value(floatX(weight))
def connect(self, l_in): self.bottom.connect(l_in) for i in range(1, len(self.layers)): self.layers[i].connect(self.layers[i-1]) self.params = flatten([l.params for l in self.layers]) if self.weights is not None: for param, weight in zip(self.params, self.weights): param.set_value(floatX(weight))
def connect(self, l_in): self.left.connect(WrappedLayer(l_in, self.left_type)) self.right.connect(WrappedLayer(l_in, self.right_type)) self.params = self.left.params + self.right.params if self.weights is not None: for param, weight in zip(self.params, self.weights): param.set_value(floatX(weight))
def __init__(self, size=128, n_features=256, init='uniform', weights=None): self.settings = locals() del self.settings['self'] self.init = getattr(inits, init) self.size = size self.n_features = n_features self.input = T.imatrix() self.wv = self.init((self.n_features, self.size)) self.params = [self.wv] if weights is not None: for param, weight in zip(self.params, weights): param.set_value(floatX(weight))
def connect(self, l_in): self.l_in = l_in self.n_in = l_in.size if 'maxout' in self.activation_str: self.w = self.init((self.n_in, self.size * 2)) self.b = shared0s((self.size * 2)) else: self.w = self.init((self.n_in, self.size)) self.b = shared0s((self.size)) self.params = [self.w, self.b] if self.weights is not None: for param, weight in zip(self.params, self.weights): param.set_value(floatX(weight))
def connect(self, l_in): self.l_in = l_in self.n_in = l_in.size if 'maxout' in self.activation_str: self.w = self.init((self.n_in, self.size*2)) self.b = shared0s((self.size*2)) else: self.w = self.init((self.n_in, self.size)) self.b = shared0s((self.size)) self.params = [self.w, self.b] if self.weights is not None: for param, weight in zip(self.params, self.weights): param.set_value(floatX(weight))
def get_updates(self, params, cost): updates = [] grads = T.grad(cost, params) grads = clip_norms(grads, self.clipnorm) i = theano.shared(floatX(0.)) i_t = i + 1. fix1 = 1. - self.b1**(i_t) fix2 = 1. - self.b2**(i_t) lr_t = self.lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = (self.b1 * g) + ((1. - self.b1) * m) v_t = (self.b2 * T.sqr(g)) + ((1. - self.b2) * v) g_t = m_t / (T.sqrt(v_t) + self.e) g_t = self.regularizer.gradient_regularize(p, g_t) p_t = p - (lr_t * g_t) p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) return updates