def __init__(self, Period=init.Uniform((10,100)), Shift=init.Uniform( (0., 1000.)), On_End=init.Constant(0.05)): self.Period = Period self.Shift = Shift self.On_End = On_End
def __init__( self, W_in=init.Normal(0.1), W_hid=init.Normal(0.1), # W_cell=init.Normal(0.1), b=init.Constant(0.), a_g=init.Uniform(0.1), b_g_hid_to_hid=init.Uniform(0.1), b_g_in_to_hid=init.Uniform(0.1), nonlinearity=nonlinearities.sigmoid, learn_a_g=True, learn_b_g_in_to_hid=True, learn_b_g_hid_to_hid=True): # TODO: Make formulation with peepholes and W_cell self.W_in = W_in self.W_hid = W_hid # if W_cell is not None: # self.W_cell = W_cell self.a_g = a_g if a_g is not None: self.learn_a_g = learn_a_g self.b_g_in_to_hid = b_g_in_to_hid if b_g_hid_to_hid is not None: self.learn_b_g_in_to_hid = learn_b_g_hid_to_hid self.b_g_hid_to_hid = b_g_hid_to_hid if b_g_hid_to_hid is not None: self.learn_b_g_hid_to_hid = learn_b_g_hid_to_hid self.b = b # For the nonlinearity, if None is supplied, use identity if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity
def __init__(self, incoming, latent, nonlinearity=None, W=init.Uniform(), b=init.Uniform(), batch_size=512, p=0.0, **kwargs): super(AE, self).__init__(incoming, **kwargs) self.num_batch, self.num_units = self.input_shape if nonlinearity is None: self.nonlinearity = identity else: self.nonlinearity = nonlinearity self.n_hidden = latent self.x = incoming self.batch_size = batch_size #num_inputs = int(np.prod(self.input_shape[1:])) rng = np.random.RandomState(123) self.drng = rng self.rng = RandomStreams(rng.randint(2 ** 30)) self.p = p initial_W = np.asarray( rng.uniform( low=-4 * np.sqrt(6. / (self.n_hidden + self.num_units)), high=4 * np.sqrt(6. / (self.n_hidden + self.num_units)), size=(self.num_units, self.n_hidden) ), dtype=theano.config.floatX ) #self.W = self.create_param(initial_W, (num_inputs, n_hidden), name="W") #self.bvis = self.create_param(bvis, (num_units,), name="bvis") if bvis is not None else None #self.bhid = self.create_param(bhid, (n_hidden,), name="bhid") if bhid is not None else None self.W = theano.shared(value=initial_W, name='W', borrow=True) bvis = theano.shared( value=np.zeros( self.num_units, dtype=theano.config.floatX ), borrow=True ) bhid = theano.shared( value=np.zeros( self.n_hidden, dtype=theano.config.floatX ), name='b', borrow=True ) # b corresponds to the bias of the hidden self.b = bhid # b_prime corresponds to the bias of the visible self.b_prime = bvis # tied weights, therefore W_prime is W transpose self.W_prime = theano.shared(value=initial_W.T, name='W_T', borrow=True)
def __init__( self, Period=init.Uniform((10, 100)), # 初始化周期 Shift=init.Uniform((0., 1000.)), # 初始化相位 On_End=init.Constant(0.05)): # 开放时间长度比例 self.Period = Period self.Shift = Shift self.On_End = On_End
def __init__(self, env_spec, hidden_sizes=(32, 32), hidden_nonlinearity=NL.rectify, hidden_w_init=LI.HeUniform(), hidden_b_init=LI.Constant(0.), output_nonlinearity=NL.tanh, output_w_init=LI.Uniform(-3e-3, 3e-3), output_b_init=LI.Uniform(-3e-3, 3e-3), bn=False): assert isinstance(env_spec.action_space, Box) Serializable.quick_init(self, locals()) l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim)) l_hidden = l_obs if bn: l_hidden = batch_norm(l_hidden) for idx, size in enumerate(hidden_sizes): l_hidden = L.DenseLayer( l_hidden, num_units=size, W=hidden_w_init, b=hidden_b_init, nonlinearity=hidden_nonlinearity, name="h%d" % idx) if bn: l_hidden = batch_norm(l_hidden) l_output = L.DenseLayer( l_hidden, num_units=env_spec.action_space.flat_dim, W=output_w_init, b=output_b_init, nonlinearity=output_nonlinearity, name="output") # Note the deterministic=True argument. It makes sure that when getting # actions from single observations, we do not update params in the # batch normalization layers action_var = L.get_output(l_output, deterministic=True) self._output_layer = l_output self._f_actions = tensor_utils.compile_function([l_obs.input_var], action_var) super(DeterministicMLPPolicy, self).__init__(env_spec) LasagnePowered.__init__(self, [l_output])
def lstm_layer(input, nunits, return_final, backwards=False, name='LSTM'): ingate = Gate(W_in=init.Uniform(0.01), W_hid=init.Uniform(0.01), b=init.Constant(0.0)) forgetgate = Gate(W_in=init.Uniform(0.01), W_hid=init.Uniform(0.01), b=init.Constant(5.0)) cell = Gate( W_cell=None, nonlinearity=T.tanh, W_in=init.Uniform(0.01), W_hid=init.Uniform(0.01), ) outgate = Gate(W_in=init.Uniform(0.01), W_hid=init.Uniform(0.01), b=init.Constant(0.0)) lstm = LSTMLayer(input, num_units=nunits, backwards=backwards, peepholes=False, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate, name=name, only_return_final=return_final, mask_input=mask) return lstm
def __init__(self, incoming, num_filters, filter_size, group=1, stride=(1, 1), border_mode="valid", untie_biases=False, W=init.Uniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, convolution=T.nnet.conv2d, **kwargs): self.group = group # actually border_mode is useless in lasagne so I removed it super(CaffeConv2DLayer, self).__init__(incoming, num_filters, filter_size, stride=stride, pad=border_mode, untie_biases=untie_biases, W=W, b=b, nonlinearity=nonlinearity, convolution=convolution, **kwargs) self.border_mode = border_mode
def __init__(self, incoming, gamma=init.Uniform([0.95, 1.05]), beta=init.Constant(0.), nonlinearity=nonlinearities.rectify, epsilon=0.001, **kwargs): super(BatchNormalizationLayer, self).__init__(incoming, **kwargs) if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity self.num_units = int(np.prod(self.input_shape[1:])) self.gamma = self.add_param(gamma, (self.num_units, ), name="BatchNormalizationLayer:gamma", trainable=True) self.beta = self.add_param(beta, (self.num_units, ), name="BatchNormalizationLayer:beta", trainable=True) self.epsilon = epsilon self.mean_inference = theano.shared(np.zeros( (1, self.num_units), dtype=theano.config.floatX), borrow=True, broadcastable=(True, False)) self.mean_inference.name = "shared:mean-" + self.name #### self.variance_inference = theano.shared(np.zeros( (1, self.num_units), dtype=theano.config.floatX), borrow=True, broadcastable=(True, False)) self.variance_inference.name = "shared:variance-" + self.name ####
def __init__(self, incoming, num_filters, filter_size, groups=1, strides=(1, 1), border_mode=None, untie_biases=False, W=init.Uniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, pad=None, dimshuffle=True, flip_filters=False, partial_sum=1, **kwargs): super(CaffeConv2DCCLayer, self).__init__(incoming, num_filters, filter_size, strides=strides, border_mode=border_mode, untie_biases=untie_biases, W=W, b=b, nonlinearity=nonlinearity, pad=pad, dimshuffle=dimshuffle, flip_filters=flip_filters, partial_sum=partial_sum, **kwargs) self.groups = groups self.filter_acts_op = FilterActs(numGroups=self.groups, stride=self.stride, partial_sum=self.partial_sum, pad=self.pad)
def __init__(self, input_layer, num_units, W=init.Uniform(), **kwargs): super(RecurrentSoftmaxLayer, self).__init__(input_layer) self.num_units = num_units self.num_time_steps = self.input_shape[1] self.num_features = self.input_shape[2] self.W = self.create_param(W, (self.num_features, self.num_units), name="W")
def ptb_lstm(input_var, vocabulary_size, hidden_size, seq_len, num_layers, dropout, batch_size): l_input = L.InputLayer(shape=(batch_size, seq_len), input_var=input_var) l_embed = L.EmbeddingLayer(l_input, vocabulary_size, hidden_size, W=init.Uniform(1.0)) l_lstms = [] for i in range(num_layers): l_lstm = L.LSTMLayer(l_embed if i == 0 else l_lstms[-1], hidden_size, ingate=L.Gate(W_in=init.GlorotUniform(), W_hid=init.Orthogonal()), forgetgate=L.Gate(W_in=init.GlorotUniform(), W_hid=init.Orthogonal(), b=init.Constant(1.0)), cell=L.Gate( W_in=init.GlorotUniform(), W_hid=init.Orthogonal(), W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), outgate=L.Gate(W_in=init.GlorotUniform(), W_hid=init.Orthogonal())) l_lstms.append(l_lstm) l_drop = L.DropoutLayer(l_lstms[-1], dropout) l_out = L.DenseLayer(l_drop, num_units=vocabulary_size, num_leading_axes=2) l_out = L.ReshapeLayer( l_out, (l_out.output_shape[0] * l_out.output_shape[1], l_out.output_shape[2])) l_out = L.NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax) return l_out
def __init__(self, incoming, num_units, hidden_nonlinearity, name=None, W_init=SpectralRadius(density=0.2), hidden_init=LI.Constant(0.), hidden_init_trainable=True, Wi_init=LI.Uniform(0.5), leak_rate=0.5, **kwargs): if hidden_nonlinearity is None: hidden_nonlinearity = NL.identity L.Layer.__init__(self, incoming, name=name) # skip direct parent, we'll do all that init here input_shape = self.input_shape[2:] input_dim = ext.flatten_shape_dim(input_shape) # self._name = name # initial hidden state self.h0 = self.add_param(hidden_init, (num_units,), name="h0", trainable=hidden_init_trainable, regularizable=False) # Weights from input to hidden self.W_xh = self.add_param(Wi_init, (input_dim, num_units), name="W_xh", trainable=False, regularizable=False) # Recurrent weights self.W_hh = self.add_param(W_init, (num_units, num_units), name="W_hh", trainable=False, regularizable=False) self.leak_rate = leak_rate self.num_units = num_units self.nonlinearity = hidden_nonlinearity
def __init__(self, Period=init.Uniform((10, 100)), Shift=init.Uniform((0., 1000.)), On_End=init.Constant(0.05), Event_W=init.GlorotUniform(), Event_b=init.Constant(0.), out_W=init.GlorotUniform(), out_b=init.Constant(0.)): self.Period = Period self.Shift = Shift self.On_End = On_End self.Event_W = Event_W self.Event_b = Event_b self.out_W = out_W self.out_b = out_b
def __init__(self, incomming, num_units, seq_length, W_scale=init.Uniform(), W_time=init.Uniform(), b_scale=init.Constant(0.), b_time=init.Constant(0.), nonlinearity_scale=nonlinearities.identity, nonlinearity_time=nonlinearities.softmax, **kwargs): """ :parameters: - num_units : int Number of segments the layer can represent. - seq_length : int Number of segments the layer can represent. - nonlinearity_scale, nonlinearity_time : callable or None - W_scale, W_time, b_scale, b_time : Theano shared variable, numpy array or callable """ super(PolygonOutputLayer, self).__init__(incomming, **kwargs) self.num_units = num_units self.seq_length = seq_length self.nonlinearity_scale = nonlinearity_scale self.nonlinearity_time = nonlinearity_time # params num_inputs = int(np.prod(self.input_shape[1:])) self.W_scale = self.create_param(W_scale, (num_inputs, num_units), name='W_scale') self.b_scale = (self.create_param( b_scale, (num_units, ), name='b_scale') if b_scale is not None else None) if num_units > 1: self.W_time = self.create_param(W_time, (num_inputs, num_units), name='W_time') self.b_time = (self.create_param( b_time, (num_units, ), name='b_time') if b_time is not None else None) else: self.W_time = None self.b_time = None
def _forward(self, inputX, hidden_units): rows, cols = inputX.shape layer = layers.InputLayer(shape=(rows, cols), input_var=self.X) layer = layers.DenseLayer(layer, num_units=hidden_units, W=init.GlorotUniform(), b=init.Uniform(), nonlinearity=nonlinearities.tanh) Hout = layers.get_output(layer) forwardfn = theano.function([self.X], Hout, allow_input_downcast=True) return forwardfn(inputX)
def test_bilinear_group_conv(x_shape, u_shape, batch_size=2): X_var = T.tensor4('X') U_var = T.matrix('U') l_x = L.InputLayer(shape=(None, ) + x_shape, input_var=X_var, name='x') l_u = L.InputLayer(shape=(None, ) + u_shape, input_var=U_var, name='u') X = np.random.random((batch_size, ) + x_shape).astype(theano.config.floatX) U = np.random.random((batch_size, ) + u_shape).astype(theano.config.floatX) l_xu_outer = LT.OuterProductLayer([l_x, l_u]) l_x_diff_pred = LT.GroupConv2DLayer(l_xu_outer, x_shape[0], filter_size=5, stride=1, pad='same', untie_biases=True, groups=x_shape[0], nonlinearity=None, W=init.Uniform(), b=init.Uniform()) X_diff_pred_var = L.get_output(l_x_diff_pred) X_diff_pred_fn = theano.function([X_var, U_var], X_diff_pred_var) X_diff_pred = X_diff_pred_fn(X, U) u_dim, = u_shape l_x_convs = [] for i in range(u_dim + 1): l_x_conv = LT.GroupConv2DLayer( l_x, x_shape[0], filter_size=5, stride=1, pad='same', untie_biases=True, groups=x_shape[0], nonlinearity=None, W=l_x_diff_pred.W.get_value()[:, i:i + 1], b=l_x_diff_pred.b.get_value() if i == u_dim else None) l_x_convs.append(l_x_conv) l_x_diff_pred_bw = LT.BatchwiseSumLayer(l_x_convs + [l_u]) X_diff_pred_bw_var = L.get_output(l_x_diff_pred_bw) X_diff_pred_bw_fn = theano.function([X_var, U_var], X_diff_pred_bw_var) X_diff_pred_bw = X_diff_pred_bw_fn(X, U) assert np.allclose(X_diff_pred, X_diff_pred_bw, atol=1e-7)
def __init__(self, incomings, Ws=init.Uniform(), bs = init.Constant(0.), nonlinearity=nonlinearities.sigmoid, prob_func=nonlinearities.linear, **kwargs): super(GatedMultipleInputsLayer,self).__init__(incomings,**kwargs) num_out = self.input_shapes[0][1] # make gates self.Ws = [self.create_param(Ws, (num_out,num_out)) for i in range(len(incomings))] self.bs = [self.create_param(bs, (num_out,)) for i in range(len(incomings))] self.num_inputs = len(incomings) self.nonlinearity = nonlinearity self.prob_func = prob_func
def __init__(self, incoming, num_units, W=init.Uniform(), E=init.Uniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, **kwargs): super(NCAALayer, self).__init__(incoming, **kwargs) if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity self.num_units = num_units assert num_units % 2 == 0 self.input_shape = incoming.get_output_shape() num_inputs = int(np.prod(self.input_shape[1:])) assert (num_inputs - 2 * team_features) % (2 * ppt) == 0 self.W = self.create_param(W, ((num_inputs - 2 * team_features) / 2 / ppt, num_units / 2)) self.E = self.create_param(E, (team_features, num_units / 2)) self.b = (self.create_param(b, (num_units / 2,)) if b is not None else None)
def createMLP(layers, s): l_in = lasagne.layers.InputLayer(shape=(None, s)) prev_layer = l_in Ws = [] for layer in layers: enc = lasagne.layers.DenseLayer(prev_layer, num_units=layer, nonlinearity=rectify, W=init.Uniform(0.001), b=None) Ws += [enc.W] drop = lasagne.layers.DropoutLayer(enc, p=0.0) prev_layer = drop idx = 1 last_enc = prev_layer # I need to put here the mask mask = lasagne.layers.InputLayer(shape=(None, layers[-1])) mask_layer = lasagne.layers.ElemwiseMergeLayer([prev_layer, mask], merge_function=T.mul) prev_layer = mask_layer for layer in layers[-2::-1]: print layer dec = lasagne.layers.DenseLayer(prev_layer, num_units=layer, nonlinearity=rectify, W=Ws[-idx].T, b=None) idx += 1 drop = lasagne.layers.DropoutLayer(dec, p=0.0) prev_layer = drop model = lasagne.layers.DenseLayer(prev_layer, num_units=s, nonlinearity=identity, W=Ws[0].T, b=None) x_sym = T.dmatrix() mask_sym = T.dmatrix() all_params = lasagne.layers.get_all_params(model) for i in all_params: print i output = lasagne.layers.get_output(model, inputs={ l_in: x_sym, mask: mask_sym }) loss_eval = lasagne.objectives.squared_error(output, x_sym).sum() loss_eval /= (2. * batch_size) updates = lasagne.updates.adam(loss_eval, all_params) return l_in, model, last_enc, theano.function([x_sym, mask_sym], loss_eval, updates=updates), mask
def __init__(self, incomings, V=init.Uniform(), **kwargs): assert len(incomings) == 4 assert len(incomings[0].output_shape) == 3 assert len(incomings[1].output_shape) == 3 assert len(incomings[2].output_shape) == 2 assert len(incomings[3].output_shape) == 2 super(WeightedFeatureLayer, self).__init__(incomings, **kwargs) emb_size = incomings[0].output_shape[2] self.V = self.add_param(V, (emb_size, ), name="V")
def test_group_conv(x_shape, num_filters, groups, batch_size=2): X_var = T.tensor4('X') l_x = L.InputLayer(shape=(None, ) + x_shape, input_var=X_var, name='x') X = np.random.random((batch_size, ) + x_shape).astype(theano.config.floatX) l_conv = LT.GroupConv2DLayer(l_x, num_filters, filter_size=3, stride=1, pad='same', untie_biases=True, groups=groups, nonlinearity=None, W=init.Uniform(), b=init.Uniform()) conv_var = L.get_output(l_conv) conv_fn = theano.function([X_var], conv_var) tic() conv = conv_fn(X) toc("conv time for x_shape=%r, num_filters=%r, groups=%r, batch_size=%r\n\t" % (x_shape, num_filters, groups, batch_size)) l_scan_conv = LT.ScanGroupConv2DLayer(l_x, num_filters, filter_size=3, stride=1, pad='same', untie_biases=True, groups=groups, nonlinearity=None, W=l_conv.W, b=l_conv.b) scan_conv_var = L.get_output(l_scan_conv) scan_conv_fn = theano.function([X_var], scan_conv_var) tic() scan_conv = scan_conv_fn(X) toc("scan_conv time for x_shape=%r, num_filters=%r, groups=%r, batch_size=%r\n\t" % (x_shape, num_filters, groups, batch_size)) assert np.allclose(conv, scan_conv)
def __init__(self, input_layer, gamma=init.Uniform([0.95, 1.05]), beta=init.Constant(0.), nonlinearity=nonlinearities.rectify, epsilon=0.001, **kwargs): super(BatchNormLayer, self).__init__(input_layer, **kwargs) self.additional_updates = None self.epsilon = epsilon if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity input_shape = input_layer.get_output_shape() if len(input_shape) == 2: # in case of dense layer self.axis = (0) param_shape = (input_shape[-1]) self.gamma = self.create_param(gamma, param_shape) self.beta = self.create_param(beta, param_shape) ema_shape = (1, input_shape[-1]) ema_bc = (True, False) elif len(input_shape) == 4: # in case of conv2d layer self.axis = (0, 2, 3) param_shape = (input_shape[1], 1, 1) # it has to be made broadcastable on the first axis self.gamma = theano.shared(utils.floatX(gamma(param_shape)), broadcastable=(False, True, True), borrow=True) self.beta = theano.shared(utils.floatX(beta(param_shape)), broadcastable=(False, True, True), borrow=True) ema_shape = (1, input_shape[1], 1, 1) ema_bc = (True, False, True, True) else: raise NotImplementedError self.mean_ema = theano.shared(np.zeros(ema_shape, dtype=theano.config.floatX), borrow=True, broadcastable=ema_bc) self.variance_ema = theano.shared(np.ones(ema_shape, dtype=theano.config.floatX), borrow=True, broadcastable=ema_bc) self.batch_cnt = theano.shared(0)
def __init__(self, incoming, num_filters, filter_size, strides=(1, 1), border_mode=None, untie_biases=False, W=init.Uniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, pad=None, flip_filters=False, **kwargs): super(Conv2DDNNLayer, self).__init__(incoming, **kwargs) if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity self.num_filters = num_filters self.filter_size = filter_size if isinstance(strides, int): strides = (strides, strides) self.strides = strides self.untie_biases = untie_biases self.flip_filters = flip_filters if border_mode is not None and pad is not None: raise RuntimeError("You cannot specify both 'border_mode' and 'pad'. To avoid ambiguity, please specify only one of them.") elif border_mode is None and pad is None: # no option specified, default to valid mode self.pad = (0, 0) self.border_mode = 'valid' elif border_mode is not None: if border_mode == 'valid': self.pad = (0, 0) self.border_mode = 'valid' elif border_mode == 'full': self.pad = (self.filter_size[0] - 1, self.filter_size[1] - 1) self.border_mode = 'full' elif border_mode == 'same': # dnn_conv does not support same, so we just specify padding directly. # only works for odd filter size, but the even filter size case is probably not worth supporting. self.pad = ((self.filter_size[0] - 1) // 2, (self.filter_size[1] - 1) // 2) self.border_mode = None else: raise RuntimeError("Unsupported border_mode for Conv2DDNNLayer: %s" % border_mode) else: if isinstance(pad, int): pad = (pad, pad) self.pad = pad self.border_mode = None self.W = self.create_param(W, self.get_W_shape(), name="W") if b is None: self.b = None elif self.untie_biases: output_shape = self.get_output_shape() self.b = self.create_param(b, (num_filters, output_shape[2], output_shape[3]), name="b") else: self.b = self.create_param(b, (num_filters,), name="b")
def __init__(self, incoming, num_units, n_hidden, W=init.Uniform(), bhid=init.Constant(0.), bvis=init.Constant(0.), nonlinearity=nonlinearities.rectify, **kwargs): super(AutoEncoder, self).__init__(incoming, **kwargs) if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity self.num_units = num_units self.n_hidden = n_hidden self.x = incoming #num_inputs = int(np.prod(self.input_shape[1:])) num_inputs = num_units rng = np.random.RandomState(123) self.rng = RandomStreams(rng.randint(2**30)) initial_W = np.asarray(rng.uniform( low=-4 * np.sqrt(6. / (n_hidden + num_units)), high=4 * np.sqrt(6. / (n_hidden + num_units)), size=(num_units, n_hidden)), dtype=theano.config.floatX) #self.W = self.create_param(initial_W, (num_inputs, n_hidden), name="W") #self.bvis = self.create_param(bvis, (num_units,), name="bvis") if bvis is not None else None #self.bhid = self.create_param(bhid, (n_hidden,), name="bhid") if bhid is not None else None self.W = theano.shared(value=initial_W, name='W', borrow=True) bvis = theano.shared(value=np.zeros(num_units, dtype=theano.config.floatX), borrow=True) bhid = theano.shared(value=np.zeros(n_hidden, dtype=theano.config.floatX), name='b', borrow=True) # b corresponds to the bias of the hidden self.b = bhid # b_prime corresponds to the bias of the visible self.b_prime = bvis # tied weights, therefore W_prime is W transpose self.W_prime = self.W.T
def __init__(self, incomings, num_units, nonlinearity=nonlinearities.sigmoid, W=init.Uniform(), b = init.Constant(0.0), **kwargs): super(MergeDense, self).__init__(incomings=incomings, **kwargs) self.num_units = num_units self.input_shapes = [ inc.output_shape for inc in incomings ] self.weights = [ self.get_weights(W, shape=input_shape, name='W%d' % i) for i, input_shape in enumerate(self.input_shapes) ] self.b = self.add_param(b, (self.num_units,), name="b", regularizable=False) self.nonlinearity = nonlinearity
def __init__(self, W_in=init.Orthogonal(0.1), W_hid=init.Orthogonal(0.1), W_cell=init.Uniform(0.1), b=init.Constant(0.), nonlinearity=nonlinearities.sigmoid): self.W_in = W_in self.W_hid = W_hid # Don't store a cell weight vector when cell is None if W_cell is not None: self.W_cell = W_cell self.b = b # For the nonlinearity, if None is supplied, use identity if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity
def lstm_layer(input, nunits, return_final, backwards=False, name='LSTM'): ingate = Gate(W_in=init.Uniform(0.01), W_hid=init.Uniform(0.01), b=init.Constant(0.0)) forgetgate = Gate(W_in=init.Uniform(0.01), W_hid=init.Uniform(0.01), b=init.Constant(5.0)) cell = Gate( W_cell=None, nonlinearity=T.tanh, W_in=init.Uniform(0.01), W_hid=init.Uniform(0.01), ) outgate = Gate(W_in=init.Uniform(0.01), W_hid=init.Uniform(0.01), b=init.Constant(0.0)) lstm = LSTMLayer(input, num_units=nunits, backwards=backwards, peepholes=False, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate, name=name, only_return_final=return_final) rec = RecurrentLayer(input, num_units=nunits, W_in_to_hid=init.GlorotNormal('relu'), W_hid_to_hid=init.GlorotNormal('relu'), backwards=backwards, nonlinearity=rectify, only_return_final=return_final, name=name) return lstm
def __init__(self, incoming, num_units, W=init.Uniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, **kwargs): super(CaffeDenseLayer, self).__init__(incoming, **kwargs) if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity self.num_units = num_units num_inputs = int(np.prod(self.input_shape[1:])) self.W = self.create_param(W, (num_inputs, num_units), name="W") self.b = (self.create_param(b, (num_units, ), name="b") if b is not None else None)
def __init__(self, incoming, num_filters, filter_size, groups=1, stride=(1, 1), border_mode=None, untie_biases=False, W=init.Uniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, pad=None, dimshuffle=True, flip_filters=False, partial_sum=1, **kwargs): super(CaffeConv2DCCLayer, self).__init__(incoming, num_filters, filter_size, stride=stride, untie_biases=untie_biases, W=W, b=b, nonlinearity=nonlinearity, pad=pad, dimshuffle=dimshuffle, flip_filters=flip_filters, partial_sum=partial_sum, **kwargs) self.groups = groups # the FilterActs in pylearn2 cannot accept tuple-type pad if isinstance(self.pad, int): self.pad = self.pad elif isinstance(self.pad, tuple): self.pad = self.pad[0] else: self.pad = 0 self.filter_acts_op = FilterActs(numGroups=self.groups, stride=self.stride, partial_sum=self.partial_sum, pad=self.pad)
def __init__(self, incoming, num_units, W_in_to_hid=init.Uniform(), W_hid_to_hid=init.Uniform(), a_g=init.Uniform(0.1), b_g_hid_to_hid=init.Uniform(0.1), b_g_in_to_hid=init.Uniform(0.1), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, hid_init=init.Constant(0.), backwards=False, learn_init=False, learn_a_g=True, learn_b_g_in_to_hid=True, learn_b_g_hid_to_hid=True, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, **kwargs): if isinstance(incoming, tuple): input_shape = incoming else: input_shape = incoming.output_shape # Retrieve the supplied name, if it exists; otherwise use '' if 'name' in kwargs: basename = kwargs['name'] + '.' # Create a separate version of kwargs for the contained layers # which does not include 'name' layer_kwargs = dict( (key, arg) for key, arg in kwargs.items() if key != 'name') else: basename = '' layer_kwargs = kwargs # We will be passing the input at each time step to the dense layer, # so we need to remove the second dimension (the time dimension) in_to_hid = DenseLayer(InputLayer((None, ) + input_shape[2:]), num_units, W=W_in_to_hid, b=None, nonlinearity=None, name=basename + 'input_to_hidden', **layer_kwargs) # The hidden-to-hidden layer expects its inputs to have num_units # features because it recycles the previous hidden state hid_to_hid = DenseLayer(InputLayer((None, num_units)), num_units, W=W_hid_to_hid, b=None, nonlinearity=None, name=basename + 'hidden_to_hidden', **layer_kwargs) # Make child layer parameters intuitively accessible self.W_in_to_hid = in_to_hid.W self.W_hid_to_hid = hid_to_hid.W super(MIRecurrentLayer, self).__init__(incoming, in_to_hid, hid_to_hid, a_g=a_g, b_g_in_to_hid=b_g_in_to_hid, b_g_hid_to_hid=b_g_hid_to_hid, b=b, nonlinearity=nonlinearity, hid_init=hid_init, backwards=backwards, learn_init=learn_init, learn_a_g=learn_a_g, learn_b_g_in_to_hid=learn_b_g_in_to_hid, gradient_steps=gradient_steps, grad_clipping=grad_clipping, unroll_scan=unroll_scan, precompute_input=precompute_input, mask_input=mask_input, only_return_final=only_return_final, **kwargs)