def test_gru_precompute(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones((num_batch, seq_len), dtype='float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_gru_precompute = GRULayer(l_inp, num_units=num_units, precompute_input=True, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_gru_no_precompute = GRULayer(l_inp, num_units=num_units, precompute_input=False, mask_input=l_mask_inp) output_precompute = helper.get_output( l_gru_precompute).eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) output_no_precompute = helper.get_output( l_gru_no_precompute).eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_precompute, output_no_precompute)
def _add_forward_backward_encoder_layer(self): is_single_layer_encoder = self._encoder_depth == 1 return_only_final_state = is_single_layer_encoder # input shape = (batch_size * input_context_size, input_seq_len, embedding_dimension) self._net['enc_forward'] = GRULayer( incoming=self._net['emb_x'], num_units=self._hidden_layer_dim, grad_clipping=self._grad_clip, only_return_final=return_only_final_state, name='encoder_forward', mask_input=self._net['input_x_mask']) # output shape = (batch_size * input_context_size, input_seq_len, hidden_layer_dimension) # or (batch_size * input_context_size, hidden_layer_dimension) # input shape = (batch_size * input_context_size, input_seq_len, embedding_dimension) self._net['enc_backward'] = GRULayer( incoming=self._net['emb_x'], num_units=self._hidden_layer_dim, grad_clipping=self._grad_clip, only_return_final=return_only_final_state, backwards=True, name='encoder_backward', mask_input=self._net['input_x_mask']) # output shape = (batch_size * input_context_size, input_seq_len, hidden_layer_dimension) # or (batch_size * input_context_size, hidden_layer_dimension) self._net['enc_0'] = ConcatLayer( incomings=[self._net['enc_forward'], self._net['enc_backward']], axis=1 if return_only_final_state else 2, name='encoder_bidirectional_concat')
def test_gru_unroll_scan_fwd(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) l_mask_inp = InputLayer(in_shp[:2]) x_in = np.random.random(in_shp).astype('float32') mask_in = np.ones(in_shp[:2]).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_gru_scan = GRULayer(l_inp, num_units=num_units, backwards=False, unroll_scan=False, mask_input=l_mask_inp) lasagne.random.get_rng().seed(1234) l_gru_unrolled = GRULayer(l_inp, num_units=num_units, backwards=False, unroll_scan=True, mask_input=l_mask_inp) output_scan = helper.get_output(l_gru_scan) output_unrolled = helper.get_output(l_gru_unrolled) output_scan_val = output_scan.eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) output_unrolled_val = output_unrolled.eval({l_inp.input_var: x_in, l_mask_inp.input_var: mask_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
def test_gru_unroll_scan_bck(): num_batch, seq_len, n_features1 = 2, 5, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.random.random(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_gru_scan = GRULayer(l_inp, num_units=num_units, backwards=True, unroll_scan=False) lasagne.random.get_rng().seed(1234) l_gru_unrolled = GRULayer(l_inp, num_units=num_units, backwards=True, unroll_scan=True) output_scan = helper.get_output(l_gru_scan, x) output_unrolled = helper.get_output(l_gru_unrolled, x) output_scan_val = output_scan.eval({x: x_in}) output_unrolled_val = output_unrolled.eval({x: x_in}) np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
def test_gru_tensor_init(): # check if passing in a TensorVariable to hid_init works num_units = 5 batch_size = 3 seq_len = 2 n_inputs = 4 in_shp = (batch_size, seq_len, n_inputs) l_inp = InputLayer(in_shp) hid_init = T.matrix() x = T.tensor3() l_lstm = GRULayer(l_inp, num_units, learn_init=True, hid_init=hid_init) # check that the tensors are used and not overwritten assert hid_init == l_lstm.hid_init # 3*n_gates, should not return any inits # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 9 # bias params(3), , should not return any inits assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 3 # check that it compiles and runs output = lasagne.layers.get_output(l_lstm, x) x_test = np.ones(in_shp, dtype='float32') hid_init_test = np.ones((batch_size, num_units), dtype='float32') output_val = output.eval({x: x_test, hid_init: hid_init_test}) assert isinstance(output_val, np.ndarray)
def build_res_stafg(): net = collections.OrderedDict() # INPUTS---------------------------------------- net['sent_input'] = InputLayer((None, CFG['SEQUENCE LENGTH']), input_var=T.imatrix()) net['word_emb'] = EmbeddingLayer(net['sent_input'], input_size=CFG['VOCAB SIZE']+3,\ output_size=CFG['WORD VECTOR SIZE'],W=np.copy(CFG['wemb'])) net['vis_input'] = InputLayer((None,CFG['VISUAL LENGTH'], CFG['VIS SIZE'])) # key words model------------------------------------- net['vis_mean_pool'] = FeaturePoolLayer(net['vis_input'], CFG['VISUAL LENGTH'],pool_function=T.mean) net['ctx_vis_reshp'] = ReshapeLayer(net['vis_mean_pool'],(-1,CFG['VIS SIZE'])) net['global_vis'] = DenseLayer(net['ctx_vis_reshp'],num_units=CFG['EMBEDDING SIZE'],nonlinearity=linear) net['key_words_prob'] = DenseLayer(DropoutLayer(net['global_vis']), num_units=CFG['VOCAB SIZE']+3,nonlinearity=sigmoid) # gru model-------------------------------------- net['mask_input'] = InputLayer((None, CFG['SEQUENCE LENGTH'])) net['sgru'] = GRULayer(net['word_emb'],num_units=CFG['EMBEDDING SIZE'], \ mask_input=net['mask_input'],hid_init=net['global_vis']) net['sta_gru'] = CTXAttentionGRULayer([net['sgru'],net['vis_input'],net['global_vis']], num_units=CFG['EMBEDDING SIZE'], mask_input=net['mask_input']) net['fusion'] = DropoutLayer(ConcatLayer([net['sta_gru'],net['gru']],axis=2), p=0.5) net['fusion_reshp'] = ReshapeLayer(net['fusion'], (-1,CFG['EMBEDDING SIZE']*2)) net['word_prob'] = DenseLayer(net['fusion_reshp'], num_units=CFG['VOCAB SIZE']+3, nonlinearity=softmax) net['sent_prob'] = ReshapeLayer(net['word_prob'],(-1,CFG['SEQUENCE LENGTH'], CFG['VOCAB SIZE']+3)) return net
def rnn_fn(self): """Define the rnn using lasagne :return l_current: lasagne RNN""" l_in = InputLayer((None, None, self.z_dim)) layers = [l_in] l_current = l_in # create the rnn layer for h in range(1, self.hid_depth + 1): backwards = True if self.bidirectional and h % 2 == 0 else False l_h = GRULayer(l_current, num_units=self.hid_dim, hidden_update=Gate(nonlinearity=tanh), backwards=backwards) # if we want to use skip-connections we concatenate the current layer if self.use_skip: layers.append(l_h) if h != self.hid_depth: l_current = ConcatLayer([l_in, l_h], axis=2) else: l_current = ConcatLayer(layers[1:], axis=2) else: l_current = l_h return l_current
def _add_context_encoder(self): self._net['batched_enc'] = reshape( self._net['enc'], (self._batch_size, self._input_context_size, get_output_shape(self._net['enc'])[-1])) self._net['context_enc'] = GRULayer(incoming=self._net['batched_enc'], num_units=self._hidden_layer_dim, grad_clipping=self._grad_clip, only_return_final=True, name='context_encoder') self._net['switch_enc_to_tv'] = T.iscalar(name='switch_enc_to_tv') self._net['thought_vector'] = InputLayer( shape=(None, self._hidden_layer_dim), input_var=T.fmatrix(name='thought_vector'), name='thought_vector') self._net['enc_result'] = SwitchLayer( incomings=[self._net['thought_vector'], self._net['context_enc']], condition=self._net['switch_enc_to_tv']) # We need the following to pass as 'givens' argument when compiling theano functions: self._default_thoughts_vector = T.zeros( (self._batch_size, self._hidden_layer_dim)) self._default_input_x = T.zeros( shape=(self._net['thought_vector'].input_var.shape[0], 1, 1), dtype=np.int32)
def __init__(self, num_batch, max_len, n_features, hidden=[200, 200], **kwargs): self.num_batch = num_batch self.n_features = n_features self.max_len = max_len self.hidden = hidden rng = np.random.RandomState(123) self.drng = rng self.rng = RandomStreams(rng.randint(2 ** 30)) # params initial_W = np.asarray( rng.uniform( low=-4 * np.sqrt(6. / (self.hidden[1] + self.n_features)), high=4 * np.sqrt(6. / (self.hidden[1] + self.n_features)), size=(self.hidden[1], self.n_features) ), dtype=theano.config.floatX ) self.W = theano.shared(value=initial_W, name='W', borrow=True) # # self.W_y_kappa = theano.shared(value=initial_W, name='W_y_kappa', borrow=True) self.b = theano.shared( value=np.zeros( self.n_features, dtype=theano.config.floatX ), borrow=True ) # self.b_y_kappa = theano.shared( # value=np.zeros( # self.n_features, # dtype=theano.config.floatX # ), # name='b', # borrow=True # ) # I could directly create the model here since it is fixed self.l_in = InputLayer(shape=(None, self.max_len, self.n_features)) self.mask_input = InputLayer(shape=(None, self.max_len)) first_hidden = GRULayer(self.l_in, mask_input=self.mask_input, num_units=hidden[0]) # l_shp = ReshapeLayer(first_hidden, (-1, hidden[0])) # l_dense = DenseLayer(l_shp, num_units=self.hidden[0], nonlinearity=rectify) # l_drop = DropoutLayer(l_dense, p=0.5) # l_shp = ReshapeLayer(l_drop, (-1, self.max_len, self.hidden[0])) self.model = GRULayer(first_hidden, num_units=hidden[1])
def test_gru_hid_init_layer_eval(): # Test `hid_init` as a `Layer` with some dummy input. Compare the output of # a network with a `Layer` as input to `hid_init` to a network with a # `np.array` as input to `hid_init` n_units = 7 n_test_cases = 2 in_shp = (n_test_cases, 2, 3) in_h_shp = (1, n_units) # dummy inputs X_test = np.ones(in_shp, dtype=theano.config.floatX) Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX) Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1)) # network with `Layer` initializer for hid_init l_inp = InputLayer(in_shp) l_inp_h = InputLayer(in_h_shp) l_rec_inp_layer = GRULayer(l_inp, n_units, hid_init=l_inp_h) # network with `np.array` initializer for hid_init l_rec_nparray = GRULayer(l_inp, n_units, hid_init=Xh_test) # copy network parameters from l_rec_inp_layer to l_rec_nparray l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()]) l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()]) for k, v in l_rn_param.items(): if k in l_il_param: v.set_value(l_il_param[k].get_value()) # build the theano functions X = T.tensor3() Xh = T.matrix() output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, { l_inp: X, l_inp_h: Xh }) output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X}) # test both nets with dummy input output_val_inp_layer = output_inp_layer.eval({ X: X_test, Xh: Xh_test_batch }) output_val_nparray = output_nparray.eval({X: X_test}) # check output given `Layer` is the same as with `np.array` assert np.allclose(output_val_inp_layer, output_val_nparray)
def test_gru_grad(): num_batch, seq_len, n_features = 5, 3, 10 num_units = 6 l_inp = InputLayer((num_batch, seq_len, n_features)) l_gru = GRULayer(l_inp, num_units=num_units) output = helper.get_output(l_gru) g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_gru)) assert isinstance(g, (list, tuple))
def __init__(self, num_batch, max_len, n_features, hidden=[200, 200], **kwargs): self.num_batch = num_batch self.n_features = n_features self.max_len = max_len self.hidden = hidden rng = np.random.RandomState(123) self.drng = rng self.rng = RandomStreams(rng.randint(2**30)) # params initial_W = np.asarray(rng.uniform(low=1e-5, high=1, size=(self.hidden[1], self.n_features)), dtype=theano.config.floatX) self.W_y_theta = theano.shared(value=initial_W, name='W_y_theta', borrow=True) # # self.W_y_kappa = theano.shared(value=initial_W, name='W_y_kappa', borrow=True) self.b_y_theta = theano.shared(value=np.zeros( self.n_features, dtype=theano.config.floatX), borrow=True) # self.b_y_kappa = theano.shared( # value=np.zeros( # self.n_features, # dtype=theano.config.floatX # ), # name='b', # borrow=True # ) # I could directly create the model here since it is fixed self.l_in = InputLayer(shape=(self.num_batch, self.max_len, self.n_features)) self.mask_input = InputLayer(shape=(self.num_batch, self.max_len)) first_hidden = GRULayer(self.l_in, mask_input=self.mask_input, num_units=hidden[0]) self.model = GRULayer(first_hidden, num_units=hidden[1])
def test_gru_return_final(): num_batch, seq_len, n_features = 2, 3, 4 num_units = 2 in_shp = (num_batch, seq_len, n_features) x_in = np.random.random(in_shp).astype('float32') l_inp = InputLayer(in_shp) lasagne.random.get_rng().seed(1234) l_rec_final = GRULayer(l_inp, num_units, only_return_final=True) lasagne.random.get_rng().seed(1234) l_rec_all = GRULayer(l_inp, num_units, only_return_final=False) output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in}) output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in}) assert output_final.shape == (output_all.shape[0], output_all.shape[2]) assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final) assert np.allclose(output_final, output_all[:, -1])
def test_gru_hid_init_layer(): # test that you can set hid_init to be a layer l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_gru = GRULayer(l_inp, 5, hid_init=l_inp_h) x = T.tensor3() h = T.matrix() output = lasagne.layers.get_output(l_gru, {l_inp: x, l_inp_h: h})
def test_gru_nparams_learn_init_true(): l_inp = InputLayer((2, 2, 3)) l_gru = GRULayer(l_inp, 5, learn_init=True) # 3*n_gates + hid_init # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate assert len(lasagne.layers.get_all_params(l_gru, trainable=True)) == 10 # bias params(3) + init params(1) assert len(lasagne.layers.get_all_params(l_gru, regularizable=False)) == 4
def test_unroll_none_input_error(): # Test that a ValueError is raised if unroll scan is True and the input # sequence length is specified as None. l_in = InputLayer((2, None, 3)) with pytest.raises(ValueError): RecurrentLayer(l_in, 5, unroll_scan=True) with pytest.raises(ValueError): LSTMLayer(l_in, 5, unroll_scan=True) with pytest.raises(ValueError): GRULayer(l_in, 5, unroll_scan=True)
def test_gradient_steps_error(): # Check that error is raised if gradient_steps is not -1 and scan_unroll # is true l_in = InputLayer((2, 2, 3)) with pytest.raises(ValueError): RecurrentLayer(l_in, 5, gradient_steps=3, unroll_scan=True) with pytest.raises(ValueError): LSTMLayer(l_in, 5, gradient_steps=3, unroll_scan=True) with pytest.raises(ValueError): GRULayer(l_in, 5, gradient_steps=3, unroll_scan=True)
def GRURecurrent(input_var, mask_var=None, batch_size=1, n_in=100, n_out=1, n_hid=200, diag_val=0.9, offdiag_val=0.01, out_nlin=lasagne.nonlinearities.linear): # Input Layer l_in = InputLayer((batch_size, None, n_in), input_var=input_var) if mask_var == None: l_mask = None else: l_mask = InputLayer((batch_size, None), input_var=mask_var) _, seqlen, _ = l_in.input_var.shape l_rec = GRULayer( l_in, n_hid, resetgate=lasagne.layers.Gate(W_in=lasagne.init.GlorotNormal(0.05), W_hid=lasagne.init.GlorotNormal(0.05), W_cell=None, b=lasagne.init.Constant(0.)), updategate=lasagne.layers.Gate(W_in=lasagne.init.GlorotNormal(0.05), W_hid=lasagne.init.GlorotNormal(0.05), W_cell=None), hidden_update=lasagne.layers.Gate( W_in=lasagne.init.GlorotNormal(0.05), W_hid=LeInit(diag_val=diag_val, offdiag_val=offdiag_val), W_cell=None, nonlinearity=lasagne.nonlinearities.rectify), hid_init=lasagne.init.Constant(0.), backwards=False, learn_init=False, gradient_steps=-1, grad_clipping=10., unroll_scan=False, precompute_input=True, mask_input=l_mask, only_return_final=False) # Output Layer l_shp = ReshapeLayer(l_rec, (-1, n_hid)) l_dense = DenseLayer(l_shp, num_units=n_out, W=lasagne.init.GlorotNormal(0.05), nonlinearity=out_nlin) # To reshape back to our original shape, we can use the symbolic shape variables we retrieved above. l_out = ReshapeLayer(l_dense, (batch_size, seqlen, n_out)) return l_out, l_rec
def test_gru_passthrough(): # Tests that the LSTM can simply pass through its input l_in = InputLayer((4, 5, 6)) zero = lasagne.init.Constant(0.) one = lasagne.init.Constant(1.) pass_gate = Gate(zero, zero, None, one, None) no_gate = Gate(zero, zero, None, zero, None) in_pass_gate = Gate( np.eye(6).astype(theano.config.floatX), zero, None, zero, None) l_rec = GRULayer(l_in, 6, no_gate, pass_gate, in_pass_gate) out = lasagne.layers.get_output(l_rec) inp = np.arange(4 * 5 * 6).reshape(4, 5, 6).astype(theano.config.floatX) np.testing.assert_almost_equal(out.eval({l_in.input_var: inp}), inp)
def test_gru_bck(): num_batch, seq_len, n_features1 = 2, 3, 4 num_units = 2 x = T.tensor3() in_shp = (num_batch, seq_len, n_features1) l_inp = InputLayer(in_shp) x_in = np.ones(in_shp).astype('float32') # need to set random seed. lasagne.random.get_rng().seed(1234) l_gru_fwd = GRULayer(l_inp, num_units=num_units, backwards=False) lasagne.random.get_rng().seed(1234) l_gru_bck = GRULayer(l_inp, num_units=num_units, backwards=True) output_fwd = helper.get_output(l_gru_fwd, x) output_bck = helper.get_output(l_gru_bck, x) output_fwd_val = output_fwd.eval({x: x_in}) output_bck_val = output_bck.eval({x: x_in}) # test that the backwards model reverses its final input np.testing.assert_almost_equal(output_fwd_val, output_bck_val[:, ::-1])
def test_gru_hid_init_mask(): # test that you can set hid_init to be a layer when a mask is provided l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_inp_msk = InputLayer((2, 2)) l_gru = GRULayer(l_inp, 5, hid_init=l_inp_h, mask_input=l_inp_msk) x = T.tensor3() h = T.matrix() msk = T.matrix() inputs = {l_inp: x, l_inp_h: h, l_inp_msk: msk} output = lasagne.layers.get_output(l_gru, inputs)
def build_model_L(in_dim=3, out_dim=3): input_var = tensor.ftensor3('x') # (B, T, D) input0 = InputLayer(shape=(None, None, in_dim), input_var=input_var, name='input0') gru0 = GRULayer(input0, num_units=out_dim, precompute_input=True, backwards=False, only_return_final=False, learn_init=True, name='gru0') return gru0
def __init__(self, incomings, voc_size, hid_state_size, SemMem=None, GRU=None, **kwargs): super(InputModule, self).__init__(incomings, **kwargs) if SemMem is not None: self.SemMem = SemMem else: self.SemMem = SemMemModule(incomings[0],voc_size,hid_state_size,**kwargs) if GRU is not None: self.GRU = GRU else: self.GRU = GRULayer(SemMem, hid_state_size) self.voc_size = voc_size self.hid_state_size = hid_state_size
def test_gru_return_shape(): num_batch, seq_len, n_features1, n_features2 = 5, 3, 10, 11 num_units = 6 x = T.tensor4() in_shp = (num_batch, seq_len, n_features1, n_features2) l_inp = InputLayer(in_shp) l_rec = GRULayer(l_inp, num_units=num_units) x_in = np.random.random(in_shp).astype('float32') output = helper.get_output(l_rec, x) output_val = output.eval({x: x_in}) assert helper.get_output_shape(l_rec, x_in.shape) == output_val.shape assert output_val.shape == (num_batch, seq_len, num_units)
def _add_decoder(self): """ Decoder returns the batch of sequences of thought vectors, each corresponds to a decoded token reshapes this 3d tensor to 2d matrix so that the next Dense layer can convert each thought vector to a probability distribution vector """ self._net['hid_states_decoder'] = InputLayer( shape=(None, self._decoder_depth, None), input_var=T.tensor3('hid_inits_decoder'), name='hid_states_decoder') # repeat along the sequence axis output_seq_len times, where output_seq_len is inferred from input tensor self._net['enc_repeated'] = RepeatLayer( incoming=self._net[ 'enc_result'], # input shape = (batch_size, encoder_output_dimension) n=self._output_seq_len, name='repeat_layer') self._net['emb_condition_id_repeated'] = RepeatLayer( incoming=self._net['emb_condition_id'], n=self._output_seq_len, name='embedding_condition_id_repeated') self._net['dec_concated_input'] = ConcatLayer( incomings=[ self._net['emb_y'], self._net['enc_repeated'], self._net['emb_condition_id_repeated'] ], axis=2, name='decoder_concated_input') # shape = (batch_size, input_seq_len, encoder_output_dimension) self._net['dec_0'] = self._net['dec_concated_input'] for dec_layer_id in xrange(1, self._decoder_depth + 1): # input shape = (batch_size, input_seq_len, embedding_dimension + hidden_dimension) self._net['dec_' + str(dec_layer_id)] = GRULayer( incoming=self._net['dec_' + str(dec_layer_id - 1)], num_units=self._hidden_layer_dim, grad_clipping=self._grad_clip, only_return_final=False, name='decoder_' + str(dec_layer_id), mask_input=self._net['input_y_mask'], hid_init=SliceLayer(self._net['hid_states_decoder'], dec_layer_id - 1, axis=1)) self._net['dec'] = self._net['dec_' + str(self._decoder_depth)]
def test_gru_variable_input_size(): # that seqlen and batchsize None works num_batch, n_features1 = 6, 5 num_units = 13 x = T.tensor3() in_shp = (None, None, n_features1) l_inp = InputLayer(in_shp) x_in1 = np.ones((num_batch + 1, 10, n_features1)).astype('float32') x_in2 = np.ones((num_batch, 15, n_features1)).astype('float32') l_rec = GRULayer(l_inp, num_units=num_units, backwards=False) output = helper.get_output(l_rec, x) output.eval({x: x_in1}) output.eval({x: x_in2})
def gru_column(input, num_units, hidden, **kwargs): kwargs.pop("only_return_final", None) assert isinstance(hidden, (list, tuple)) name = kwargs.pop("name", "default") column = [input] for i, l_hidden in enumerate(hidden): kwargs_ = kwargs.copy() if isinstance(l_hidden, Layer): kwargs_.pop("learn_init", None) kwargs_["hid_init"] = l_hidden layer = GRULayer(column[-1], num_units, name=os.path.join(name, "gru_%02d" % i), **kwargs_) column.append(layer) return column[1:]
def test_gru_nparams_hid_init_layer(): # test that you can see layers through hid_init l_inp = InputLayer((2, 2, 3)) l_inp_h = InputLayer((2, 5)) l_inp_h_de = DenseLayer(l_inp_h, 7) l_gru = GRULayer(l_inp, 7, hid_init=l_inp_h_de) # directly check the layers can be seen through hid_init assert lasagne.layers.get_all_layers(l_gru) == [l_inp, l_inp_h, l_inp_h_de, l_gru] # 3*n_gates + 2 # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate # 2 is for the W and b parameters in the DenseLayer assert len(lasagne.layers.get_all_params(l_gru, trainable=True)) == 11 # GRU bias params(3) + Dense bias params(1) assert len(lasagne.layers.get_all_params(l_gru, regularizable=False)) == 4
def _add_utterance_encoder(self): # input shape = (batch_size * input_context_size, input_seq_len, embedding_dimension) self._add_forward_backward_encoder_layer() for enc_layer_id in xrange(1, self._encoder_depth): is_last_encoder_layer = enc_layer_id == self._encoder_depth - 1 return_only_final_state = is_last_encoder_layer # input shape = (batch_size * input_context_size, input_seq_len, embedding_dimension) self._net['enc_' + str(enc_layer_id)] = GRULayer( incoming=self._net['enc_' + str(enc_layer_id - 1)], num_units=self._hidden_layer_dim, grad_clipping=self._grad_clip, only_return_final=return_only_final_state, name='encoder_' + str(enc_layer_id), mask_input=self._net['input_x_mask']) self._net['enc'] = self._net['enc_' + str(self._encoder_depth - 1)]
def gated_layer(incoming, num_units, grad_clipping, only_return_final, backwards, gated_layer_type, mask_input=None, cell_init=lasagne.init.Constant(0.), hid_init=lasagne.init.Constant(0.), resetgate=lasagne.layers.Gate(W_cell=None), updategate=lasagne.layers.Gate(W_cell=None), hidden_update=lasagne.layers.Gate( W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), name=None): if gated_layer_type == "gru": return GRULayer(incoming, num_units, mask_input=mask_input, grad_clipping=grad_clipping, only_return_final=only_return_final, backwards=backwards, hid_init=hid_init, resetgate=resetgate, updategate=updategate, hidden_update=hidden_update, name=name) else: return LSTMLayer(incoming, num_units, mask_input=mask_input, grad_clipping=grad_clipping, nonlinearity=lasagne.nonlinearities.tanh, only_return_final=only_return_final, backwards=backwards, cell_init=cell_init, hid_init=hid_init, resetgate=resetgate, updategate=updategate, hidden_update=hidden_update, name=name)