def __init__(self, x, y, vocab_size, hidden_size, num_layers, pretrained_embeds=None): """ Implements a neural language model using an LSTM. Word y_n+1 ~ Softmax(U * h_n) :param x A minibatch: each row is an instance (a sequence), with batch_size rows :param y x shifted by 1, which are the target words to predict for the language modeling objective based on the hidden LSTM state :param vocab_size The number of types in the training data :param hidden_size The dimensionality of the word embeddings :param pretrained_embeds Pretrained embeddings for initailization as an ND array """ self.vocab_size = vocab_size self.hidden_size = hidden_size self.num_layers = num_layers # Initialize the word embedding table. If we have pretrained embeddings, we use those self.word_embedding_lookup = LookupTable(length=vocab_size, dim=hidden_size, name="word_embeddings") if pretrained_embeds is None: initialize(self.word_embedding_lookup, 0.8) else: assert pretrained_embeds.shape[0] == vocab_size and pretrained_embeds.shape[1] == hidden_size self.word_embedding_lookup.weights_init = Constant(pretrained_embeds) self.word_embedding_lookup.biases_init = Constant(0) self.word_embedding_lookup.initialize() self.word_embeddings = self.word_embedding_lookup.W self.y_hat, self.cost, self.cells = self.nn_fprop(x, y, num_layers)
def __init__(self, num_outputs=None, feedback_dim=None, **kwargs): super(LookupFeedback, self).__init__(**kwargs) update_instance(self, locals()) self.lookup = LookupTable(num_outputs, feedback_dim, weights_init=self.weights_init) self.children = [self.lookup]
def __init__(self, num_outputs=None, feedback_dim=None, **kwargs): self.num_outputs = num_outputs self.feedback_dim = feedback_dim self.lookup = LookupTable(num_outputs, feedback_dim) children = [self.lookup] + kwargs.get('children', []) super(LookupFeedback, self).__init__(children=children, **kwargs)
def __init__(self, emb_dim, dim, num_input_words, num_output_words, vocab, **kwargs): if emb_dim == 0: emb_dim = dim if num_input_words == 0: num_input_words = vocab.size() if num_output_words == 0: num_output_words = vocab.size() self._num_input_words = num_input_words self._num_output_words = num_output_words self._vocab = vocab self._word_to_id = WordToIdOp(self._vocab) children = [] self._main_lookup = LookupTable(self._num_input_words, emb_dim, name='main_lookup') self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork') self._encoder_rnn = LSTM(dim, name='encoder_rnn') self._decoder_fork = Linear(emb_dim, 4 * dim, name='decoder_fork') self._decoder_rnn = LSTM(dim, name='decoder_rnn') children.extend([self._main_lookup, self._encoder_fork, self._encoder_rnn, self._decoder_fork, self._decoder_rnn]) self._pre_softmax = Linear(dim, self._num_output_words) self._softmax = NDimensionalSoftmax() children.extend([self._pre_softmax, self._softmax]) super(LanguageModel, self).__init__(children=children, **kwargs)
def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.lookup = LookupTable(name='embeddings') self.bidir = BidirectionalWMT15( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='fwd_fork') self.back_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='back_fork') self.children = [ self.lookup, self.bidir, self.fwd_fork, self.back_fork ]
def __init__(self, num_input_words, emb_dim, dim, vocab, lookup=None, fork_and_rnn=None, **kwargs): if num_input_words > 0: logger.info("Restricting def vocab to " + str(num_input_words)) self._num_input_words = num_input_words else: self._num_input_words = vocab.size() self._vocab = vocab children = [] if lookup is None: self._def_lookup = LookupTable(self._num_input_words, emb_dim, name='def_lookup') else: self._def_lookup = lookup if fork_and_rnn is None: self._def_fork = Linear(emb_dim, 4 * dim, name='def_fork') self._def_rnn = LSTM(dim, name='def_rnn') else: self._def_fork, self._def_rnn = fork_and_rnn children.extend([self._def_lookup, self._def_fork, self._def_rnn]) super(LSTMReadDefinitions, self).__init__(children=children, **kwargs)
def __init__(self, vocab_size, embedding_dim, dgru_state_dim, dgru_depth, **kwargs): super(Decimator, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.dgru_state_dim = dgru_state_dim self.embedding_dim = embedding_dim self.lookup = LookupTable(name='embeddings') self.dgru_depth = dgru_depth # representation self.dgru = RecurrentStack([ DGRU(activation=Tanh(), dim=self.dgru_state_dim) for _ in range(dgru_depth) ], skip_connections=True) # importance of this representation self.bidir_w = Bidirectional(RecurrentWithFork( DGRU(activation=Tanh(), dim=self.dgru_state_dim // 2), self.embedding_dim, name='src_word_with_fork'), name='bidir_src_word_encoder') self.gru_fork = Fork( [name for name in self.dgru.apply.sequences if name != 'mask'], prototype=Linear(), name='gru_fork') # map to a energy scalar self.wl = Linear(input_dim=dgru_state_dim, output_dim=1) self.children = [ self.lookup, self.dgru, self.gru_fork, self.bidir_w, self.wl ]
def __init__(self, vocab_size, embedding_dim, igru_state_dim, emitter=None, feedback_brick=None, merge=None, merge_prototype=None, post_merge=None, merged_dim=None, igru=None, **kwargs): self.igru = igru self.lookup = LookupTable(name='embeddings') self.vocab_size = vocab_size self.igru_state_dim = igru_state_dim self.gru_to_softmax = Linear(input_dim=igru_state_dim, output_dim=vocab_size) self.embedding_dim = embedding_dim self.gru_fork = Fork([ name for name in self.igru.apply.sequences if name != 'mask' and name != 'input_states' ], prototype=Linear(), name='gru_fork') kwargs['children'] = [ self.igru, self.lookup, self.gru_to_softmax, self.gru_fork ] super(Interpolator, self).__init__(emitter=emitter, feedback_brick=feedback_brick, merge=merge, merge_prototype=merge_prototype, post_merge=post_merge, merged_dim=merged_dim, **kwargs)
def __init__( self, encoder_type, num_characters, input_dim, encoder_dim, **kwargs): assert encoder_type in [None, 'bidirectional'] self.encoder_type = encoder_type super(Encoder, self).__init__(**kwargs) self.children = [] if encoder_type in ['lookup', 'bidirectional']: self.embed_label = LookupTable( num_characters, input_dim, name='embed_label') self.children += [ self.embed_label] else: # If there is no encoder. assert num_characters == input_dim if encoder_type == 'bidirectional': transition = RecurrentWithFork( GatedRecurrent(dim=encoder_dim).apply, input_dim, name='encoder_transition') self.encoder = Bidirectional(transition, name='encoder') self.children.append(self.encoder)
def __init__(self, morpho_idxs, masks, word_idxs, morpho_vocab_size, hidden_size, word_embeds): """ Implements a morpheme-level prior by computing the sum of KL-Div of the elements of the morpheme embeddings and the word embeddings (where these elements are in [0,1] and are taken as Bernoulli dists). :param morpho_idxs A 3D tensor of batch_size x seq_length x max_morphemes_per_word Where the 3rd dimension is morpheme indices, padded with 0's so all words have the same morpheme decomposition length :param masks A 4D tensor of bits which select which values in morpho_idxs are padding and which are actual morphemes. 4D is needed for broadcasting :param word_idxs A 2D matrix of batch_size x seq_length of word indices :param morpho_vocab_size the number of morpheme types seen in training data :param hidden_size the dimensionality of morpheme / word embeddings :param word_embeds the unconstrained word embeddings from the language model """ self.morpho_vocab_size = morpho_vocab_size self.hidden_size = hidden_size self.word_embed_lookup = word_embeds # These are the unconstrained word embeddings self.morpho_embed_lookup = LookupTable(length=morpho_vocab_size, dim=hidden_size, name="morpho_embeddings") initialize(self.morpho_embed_lookup, 0.8) self.cost = self.compute_cost(morpho_idxs, masks, word_idxs) self.cost.name = "morpho_cost" self.norm = self.morpho_embed_lookup.W.norm(2) self.norm.name = "morpho_embed_norm"
def __init__(self, num_outputs=None, feedback_dim=None, **kwargs): self.num_outputs = num_outputs self.feedback_dim = feedback_dim self.lookup = LookupTable(num_outputs, feedback_dim) children = [self.lookup] kwargs.setdefault('children', []).extend(children) super(LookupFeedback, self).__init__(**kwargs)
def __init__(self, num_outputs=None, feedback_dim=None, **kwargs): super(LookupFeedback, self).__init__(**kwargs) self.num_outputs = num_outputs self.feedback_dim = feedback_dim self.lookup = LookupTable(num_outputs, feedback_dim, weights_init=self.weights_init) self.children = [self.lookup]
def create_rnn(hidden_dim, vocab_dim,mode="rnn"): # input x = tensor.imatrix('inchar') y = tensor.imatrix('outchar') # W = LookupTable( name = "W1", #dim = hidden_dim*4, dim = hidden_dim, length = vocab_dim, weights_init = initialization.IsotropicGaussian(0.01), biases_init = initialization.Constant(0) ) if mode == "lstm": # Long Short Term Memory H = LSTM( hidden_dim, name = 'H', weights_init = initialization.IsotropicGaussian(0.01), biases_init = initialization.Constant(0.0) ) else: # recurrent history weight H = SimpleRecurrent( name = "H", dim = hidden_dim, activation = Tanh(), weights_init = initialization.IsotropicGaussian(0.01) ) # S = Linear( name = "W2", input_dim = hidden_dim, output_dim = vocab_dim, weights_init = initialization.IsotropicGaussian(0.01), biases_init = initialization.Constant(0) ) A = NDimensionalSoftmax( name = "softmax" ) initLayers([W,H,S]) activations = W.apply(x) hiddens = H.apply(activations)#[0] activations2 = S.apply(hiddens) y_hat = A.apply(activations2, extra_ndim=1) cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean() cg = ComputationGraph(cost) #print VariableFilter(roles=[WEIGHT])(cg.variables) #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables) layers = (x, W, H, S, A, y) return cg, layers, y_hat, cost
def __init__(self, vocab_size, embedding_dim, igru_state_dim, igru_depth, trg_dgru_depth, emitter, feedback_brick, merge=None, merge_prototype=None, post_merge=None, **kwargs): merged_dim = igru_state_dim if not merge: merge = Merge(input_names=kwargs['source_names'], prototype=merge_prototype) if not post_merge: post_merge = Bias(dim=merged_dim) # for compatible if igru_depth == 1: self.igru = IGRU(dim=igru_state_dim) else: self.igru = RecurrentStack( [IGRU(dim=igru_state_dim, name='igru')] + [ UpperIGRU(dim=igru_state_dim, activation=Tanh(), name='upper_igru' + str(i)) for i in range(1, igru_depth) ], skip_connections=True) self.embedding_dim = embedding_dim self.emitter = emitter self.feedback_brick = feedback_brick self.merge = merge self.post_merge = post_merge self.merged_dim = merged_dim self.igru_depth = igru_depth self.trg_dgru_depth = trg_dgru_depth self.lookup = LookupTable(name='embeddings') self.vocab_size = vocab_size self.igru_state_dim = igru_state_dim self.gru_to_softmax = Linear(input_dim=igru_state_dim, output_dim=vocab_size) self.gru_fork = Fork([ name for name in self.igru.apply.sequences if name != 'mask' and name != 'input_states' ], prototype=Linear(), name='gru_fork') children = [ self.emitter, self.feedback_brick, self.merge, self.post_merge, self.igru, self.lookup, self.gru_to_softmax, self.gru_fork ] kwargs.setdefault('children', []).extend(children) super(Interpolator, self).__init__(**kwargs)
def test_lookup_table(): lt = LookupTable(5, 3) lt.allocate() lt.W.set_value(numpy.arange(15).reshape(5, 3).astype(theano.config.floatX)) x = tensor.lmatrix("x") y = lt.apply(x) f = theano.function([x], [y]) x_val = [[1, 2], [0, 3]] desired = numpy.array([[[3, 4, 5], [6, 7, 8]], [[0, 1, 2], [9, 10, 11]]], dtype=theano.config.floatX) assert_equal(f(x_val)[0], desired) # Test get_dim assert_equal(lt.get_dim(lt.apply.inputs[0]), 0) assert_equal(lt.get_dim(lt.apply.outputs[0]), lt.dim) assert_raises(ValueError, lt.get_dim, 'random_name') # Test feedforward interface assert lt.input_dim == 0 assert lt.output_dim == 3 lt.output_dim = 4 assert lt.output_dim == 4 def assign_input_dim(): lt.input_dim = 11 assert_raises(ValueError, assign_input_dim) lt.input_dim = 0
def create_model(self): input_dim = self.input_dim x = self.x y = self.y p = self.p mask = self.mask hidden_dim = self.hidden_dim embedding_dim = self.embedding_dim lookup = LookupTable(self.dict_size, embedding_dim, weights_init=IsotropicGaussian(0.001), name='LookupTable') x_to_h = Linear(embedding_dim, hidden_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.0)) lstm = LSTM(hidden_dim, name='lstm', weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.0)) h_to_o = MLP([Logistic()], [hidden_dim, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0), name='h_to_o') lookup.initialize() x_to_h.initialize() lstm.initialize() h_to_o.initialize() embed = lookup.apply(x).reshape( (x.shape[0], x.shape[1], self.embedding_dim)) embed.name = "embed_vec" x_transform = x_to_h.apply(embed.transpose(1, 0, 2)) x_transform.name = "Transformed X" self.lookup = lookup self.x_to_h = x_to_h self.lstm = lstm self.h_to_o = h_to_o #if mask is None: h, c = lstm.apply(x_transform) #else: #h, c = lstm.apply(x_transform, mask=mask) h.name = "hidden_state" c.name = "cell state" # only values of hidden units of the last timeframe are used for # the classification indices = T.sum(mask, axis=0) - 1 rel_hid = h[indices, T.arange(h.shape[1])] out = self.h_to_o.apply(rel_hid) probs = out return probs
def nn_fprop(x, y, vocab_size, hidden_size, num_layers, model): lookup = LookupTable(length=vocab_size, dim=hidden_size) initialize([lookup]) h = lookup.apply(x) for i in range(num_layers): if model == 'rnn': h = rnn_layer(hidden_size, h, i) if model == 'gru': h = gru_layer(hidden_size, h, i) if model == 'lstm': h = lstm_layer(hidden_size, h, i) return softmax_layer(h, y, vocab_size, hidden_size)
def __init__(self, vocab_size, topical_embedding_dim, state_dim,word_num,batch_size, **kwargs): super(topicalq_transformer, self).__init__(**kwargs) self.vocab_size = vocab_size; self.word_embedding_dim = topical_embedding_dim; self.state_dim = state_dim; self.word_num=word_num; self.batch_size=batch_size; self.look_up=LookupTable(name='topical_embeddings'); self.transformer=MLP(activations=[Tanh()], dims=[self.word_embedding_dim*self.word_num, self.state_dim], name='topical_transformer'); self.children = [self.look_up,self.transformer];
def build_model(self, x, config): logger.info('building %s model for: %s ', self.nn_model, self.name) vocabsize = self.get_vocab_size() logger.info('%s vocab size is: %d', self.name, vocabsize) self.embeddings, self.dim_emb = self.get_embeddings() if self.tune_tune: logger.info('%s lookuptable with size (%d, %d) will be tuned.', self.name, vocabsize, self.dim_emb) lookup = LookupTable(length=vocabsize, dim=self.dim_emb) lookup.allocate() # add_role(lookup.W, WEIGHT) lookup.W.name = 'lt.W' else: logger.info('%s lookuptable with size (%d, %d) will NOT be tuned.', self.name, vocabsize, self.dim_emb) lookup = MyLookupTable(length=vocabsize, dim=self.dim_emb) lookup.allocate() lookup.name = self.name + 'lookuptable' lookup.W.set_value(self.embeddings) xemb = lookup.apply(x) xemb = debug_print(xemb, 'xemb', False) if 'cnn' in self.nn_model: logger.info('CNN') feature_vec, feature_vec_len = create_cnn_general(xemb, self.dim_emb, self.max_len, config, self.name) elif self.nn_model == 'lstm': feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, False, config, self.name) elif self.nn_model == 'bilstm': feature_vec, feature_vec_len = create_lstm(xemb, self.dim_emb, True, config, self.name) elif self.nn_model == 'rnn': feature_vec, feature_vec_len = create_rnn(xemb, self.dim_emb, config, self.name) elif self.nn_model == 'ff': feature_vec, feature_vec_len = create_ff(xemb, self.dim_emb, self.max_len, config) elif self.nn_model == 'mean': feature_vec, feature_vec_len = create_mean(xemb, self.dim_emb, self.max_len, config) return feature_vec, feature_vec_len
def test_lookup_table(): lt = LookupTable(5, 3) lt.allocate() lt.W.set_value(numpy.arange(15).reshape(5, 3).astype(theano.config.floatX)) x = tensor.lmatrix("x") y = lt.apply(x) f = theano.function([x], [y]) x_val = [[1, 2], [0, 3]] desired = numpy.array([[[3, 4, 5], [6, 7, 8]], [[0, 1, 2], [9, 10, 11]]], dtype=theano.config.floatX) assert_equal(f(x_val)[0], desired)
def __init__(self, vocab_size, embedding_dim, n_layers, skip_connections, state_dim, **kwargs): """Sole constructor. Args: vocab_size (int): Source vocabulary size embedding_dim (int): Dimension of the embedding layer n_layers (int): Number of layers. Layers share the same weight matrices. skip_connections (bool): Skip connections connect the source word embeddings directly with deeper layers to propagate the gradient more efficiently state_dim (int): Number of hidden units in the recurrent layers. """ super(DeepBidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.n_layers = n_layers self.state_dim = state_dim self.skip_connections = skip_connections self.lookup = LookupTable(name='embeddings') self.bidirs = [] self.fwd_forks = [] self.back_forks = [] for i in xrange(self.n_layers): bidir = BidirectionalWMT15(GatedRecurrent(activation=Tanh(), dim=state_dim), name='bidir%d' % i) self.bidirs.append(bidir) self.fwd_forks.append( Fork([ name for name in bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='fwd_fork%d' % i)) self.back_forks.append( Fork([ name for name in bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='back_fork%d' % i)) self.children = [self.lookup] \ + self.bidirs \ + self.fwd_forks \ + self.back_forks
def create_rnn(hidden_dim, vocab_dim, mode="rnn"): # input x = tensor.imatrix('inchar') y = tensor.imatrix('outchar') # W = LookupTable( name="W1", #dim = hidden_dim*4, dim=hidden_dim, length=vocab_dim, weights_init=initialization.IsotropicGaussian(0.01), biases_init=initialization.Constant(0)) if mode == "lstm": # Long Short Term Memory H = LSTM(hidden_dim, name='H', weights_init=initialization.IsotropicGaussian(0.01), biases_init=initialization.Constant(0.0)) else: # recurrent history weight H = SimpleRecurrent( name="H", dim=hidden_dim, activation=Tanh(), weights_init=initialization.IsotropicGaussian(0.01)) # S = Linear(name="W2", input_dim=hidden_dim, output_dim=vocab_dim, weights_init=initialization.IsotropicGaussian(0.01), biases_init=initialization.Constant(0)) A = NDimensionalSoftmax(name="softmax") initLayers([W, H, S]) activations = W.apply(x) hiddens = H.apply(activations) #[0] activations2 = S.apply(hiddens) y_hat = A.apply(activations2, extra_ndim=1) cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean() cg = ComputationGraph(cost) #print VariableFilter(roles=[WEIGHT])(cg.variables) #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables) layers = (x, W, H, S, A, y) return cg, layers, y_hat, cost
def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size, subword_RNN_hidden_state_size, table_width=0.08, init_type='xavier', **kwargs): super(LSTMCompositionalLayer, self).__init__(**kwargs) self.batch_size = batch_size self.num_subwords = num_subwords # number of subwords which make up a word self.num_words = num_words # number of words in the sentence self.subword_embedding_size = subword_embedding_size self.input_vocab_size = input_vocab_size self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size self.table_width = table_width # create the look up table self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name='input_lookup') self.lookup.weights_init = Uniform(width=table_width) self.lookup.biases_init = Constant(0) if init_type == 'xavier': linear_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size) lstm_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size) else: # default is gaussian linear_init = IsotropicGaussian() lstm_init = IsotropicGaussian() # The `inputs` are then split in this order: Input gates, forget gates, cells and output gates self.linear_forward = Linear(input_dim=self.subword_embedding_size, output_dim=self.subword_RNN_hidden_state_size * 4, name='linear_forward', weights_init=linear_init, biases_init=Constant(0.0)) self.compositional_subword_to_word_RNN_forward = LSTM( dim=self.subword_RNN_hidden_state_size, activation=Tanh(), name='subword_RNN_forward', weights_init=lstm_init, biases_init=Constant(0.0)) self.children = [self.lookup, self.linear_forward, self.compositional_subword_to_word_RNN_forward]
def __init__(self, dimension, alphabet_size, **kwargs): super(WordReverser, self).__init__(**kwargs) encoder = Bidirectional( SimpleRecurrent(dim=dimension, activation=Tanh())) fork = Fork([name for name in encoder.prototype.apply.sequences if name != 'mask']) fork.input_dim = dimension fork.output_dims = [dimension for name in fork.input_names] lookup = LookupTable(alphabet_size, dimension) transition = SimpleRecurrent( activation=Tanh(), dim=dimension, name="transition") attention = SequenceContentAttention( state_names=transition.apply.states, attended_dim=2 * dimension, match_dim=dimension, name="attention") readout = Readout( readout_dim=alphabet_size, source_names=[transition.apply.states[0], attention.take_glimpses.outputs[0]], emitter=SoftmaxEmitter(name="emitter"), feedback_brick=LookupFeedback(alphabet_size, dimension), name="readout") generator = SequenceGenerator( readout=readout, transition=transition, attention=attention, name="generator") self.lookup = lookup self.fork = fork self.encoder = encoder self.generator = generator self.children = [lookup, fork, encoder, generator]
def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size, subword_RNN_hidden_state_size, add_one = True, **kwargs): super(CompositionalLayerToyBidirectional, self).__init__(**kwargs) self.batch_size = batch_size self.num_subwords = num_subwords # number of subwords which make up a word self.num_words = num_words # number of words in the sentence self.subword_embedding_size = subword_embedding_size self.input_vocab_size = input_vocab_size self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size self.add_one = add_one #adds 1 to the backwards embeddings # create the look up table self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name='input_lookup') self.lookup.weights_init = Uniform(width=0.08) self.lookup.biases_init = Constant(0) # has one RNN which reads the subwords into a word embedding self.compositional_subword_to_word_RNN_forward = SimpleRecurrent( dim=self.subword_RNN_hidden_state_size, activation=Identity(), name='subword_RNN_forward', weights_init=Identity_init()) self.compositional_subword_to_word_RNN_backward = SimpleRecurrent( dim=self.subword_RNN_hidden_state_size, activation=Identity(), name='subword_RNN_backward', weights_init=Identity_init()) self.children = [self.lookup, self.compositional_subword_to_word_RNN_forward, self.compositional_subword_to_word_RNN_backward]
class BidirectionalEncoder(Initializable): """Encoder of RNNsearch model.""" def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.lookup = LookupTable(name='embeddings') self.bidir = NewBidirectional( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='fwd_fork') self.back_fork = Fork([ name for name in self.bidir.prototype.apply.sequences if name != 'mask' ], prototype=Linear(), name='back_fork') self.children = [ self.lookup, self.bidir, self.fwd_fork, self.back_fork ] def _push_allocation_config(self): self.lookup.length = self.vocab_size self.lookup.dim = self.embedding_dim self.fwd_fork.input_dim = self.embedding_dim self.fwd_fork.output_dims = [ self.bidir.children[0].get_dim(name) for name in self.fwd_fork.output_names ] self.back_fork.input_dim = self.embedding_dim self.back_fork.output_dims = [ self.bidir.children[1].get_dim(name) for name in self.back_fork.output_names ] @application(inputs=['source_sentence', 'source_sentence_mask'], outputs=['representation']) def apply(self, source_sentence, source_sentence_mask): # Time as first dimension. source_sentence = source_sentence.T source_sentence_mask = source_sentence_mask.T embeddings = self.lookup.apply(source_sentence) representation = self.bidir.apply( # Conversion to embedding representation here. merge(self.fwd_fork.apply(embeddings, as_dict=True), {'mask': source_sentence_mask}), merge(self.back_fork.apply(embeddings, as_dict=True), {'mask': source_sentence_mask})) self.representation = representation return representation
class LookupFeedback(AbstractFeedback, Initializeable): """A feedback brick for the case when readout are integers. Stores and retrieves distributed representations of integers. Notes ----- Currently works only with lazy initialization (can not be initialized with a single constructor call). """ def __init__(self, num_outputs=None, feedback_dim=None, **kwargs): super(LookupFeedback, self).__init__(**kwargs) update_instance(self, locals()) self.lookup = LookupTable(num_outputs, feedback_dim, kwargs.get("weights_init")) self.children = [self.lookup] def _push_allocation_config(self): self.lookup.length = self.num_outputs self.lookup.dim = self.feedback_dim @application def feedback(self, outputs, **kwargs): assert self.output_dim == 0 return self.lookup.lookup(outputs) def get_dim(self, name): if name == 'feedback': return self.feedback_dim return super(LookupFeedback, self).get_dim(name)
def __init__(self, num_outputs=None, feedback_dim=None, **kwargs): super(LookupFeedback, self).__init__(**kwargs) update_instance(self, locals()) self.lookup = LookupTable(num_outputs, feedback_dim, kwargs.get("weights_init")) self.children = [self.lookup]
class LookupFeedback(AbstractFeedback, Initializable): """A feedback brick for the case when readout are integers. Stores and retrieves distributed representations of integers. Notes ----- Currently works only with lazy initialization (can not be initialized with a single constructor call). """ def __init__(self, num_outputs=None, feedback_dim=None, **kwargs): super(LookupFeedback, self).__init__(**kwargs) update_instance(self, locals()) self.lookup = LookupTable(num_outputs, feedback_dim, weights_init=self.weights_init) self.children = [self.lookup] def _push_allocation_config(self): self.lookup.length = self.num_outputs self.lookup.dim = self.feedback_dim @application def feedback(self, outputs, **kwargs): assert self.output_dim == 0 return self.lookup.lookup(outputs) def get_dim(self, name): if name == 'feedback': return self.feedback_dim return super(LookupFeedback, self).get_dim(name)
class LookupFeedback(AbstractFeedback, Initializable): """A feedback brick for the case when readout are integers. Stores and retrieves distributed representations of integers. """ def __init__(self, num_outputs=None, feedback_dim=None, **kwargs): super(LookupFeedback, self).__init__(**kwargs) self.num_outputs = num_outputs self.feedback_dim = feedback_dim self.lookup = LookupTable(num_outputs, feedback_dim, weights_init=self.weights_init) self.children = [self.lookup] def _push_allocation_config(self): self.lookup.length = self.num_outputs self.lookup.dim = self.feedback_dim @application def feedback(self, outputs): assert self.output_dim == 0 return self.lookup.apply(outputs) def get_dim(self, name): if name == 'feedback': return self.feedback_dim return super(LookupFeedback, self).get_dim(name)
class topicalq_transformer(Initializable): def __init__(self, vocab_size, topical_embedding_dim, state_dim,word_num,batch_size, **kwargs): super(topicalq_transformer, self).__init__(**kwargs) self.vocab_size = vocab_size; self.word_embedding_dim = topical_embedding_dim; self.state_dim = state_dim; self.word_num=word_num; self.batch_size=batch_size; self.look_up=LookupTable(name='topical_embeddings'); self.transformer=MLP(activations=[Tanh()], dims=[self.word_embedding_dim*self.word_num, self.state_dim], name='topical_transformer'); self.children = [self.look_up,self.transformer]; def _push_allocation_config(self): self.look_up.length = self.vocab_size self.look_up.dim = self.word_embedding_dim # do we have to push_config? remain unsure @application(inputs=['source_topical_word_sequence'], outputs=['topical_embedding']) def apply(self, source_topical_word_sequence): # Time as first dimension source_topical_word_sequence=source_topical_word_sequence.T; word_topical_embeddings = self.look_up.apply(source_topical_word_sequence); word_topical_embeddings=word_topical_embeddings.swapaxes(0,1); #requires testing concatenated_topical_embeddings=tensor.reshape(word_topical_embeddings,[word_topical_embeddings.shape[0],word_topical_embeddings.shape[1]*word_topical_embeddings.shape[2]]); topical_embedding=self.transformer.apply(concatenated_topical_embeddings); return topical_embedding
def __init__(self, dimension, alphabet_size, **kwargs): super(SimpleGenerator, self).__init__(**kwargs) lookup = LookupTable(alphabet_size, dimension) transition = SimpleRecurrent(activation=Tanh(), dim=dimension, name="transition") attention = SequenceContentAttention( state_names=transition.apply.states, attended_dim=dimension, match_dim=dimension, name="attention") readout = Readout(readout_dim=alphabet_size, source_names=[ transition.apply.states[0], attention.take_glimpses.outputs[0] ], emitter=SoftmaxEmitter(name="emitter"), feedback_brick=LookupFeedback( alphabet_size, dimension), name="readout") generator = SequenceGenerator(readout=readout, transition=transition, attention=attention, name="generator") self.lookup = lookup self.generator = generator self.children = [lookup, generator]
class LookupFeedback(AbstractFeedback, Initializable): """A feedback brick for the case when readout are integers. Stores and retrieves distributed representations of integers. """ def __init__(self, num_outputs=None, feedback_dim=None, **kwargs): self.num_outputs = num_outputs self.feedback_dim = feedback_dim self.lookup = LookupTable(num_outputs, feedback_dim) children = [self.lookup] kwargs.setdefault('children', []).extend(children) super(LookupFeedback, self).__init__(**kwargs) def _push_allocation_config(self): self.lookup.length = self.num_outputs self.lookup.dim = self.feedback_dim @application def feedback(self, outputs): assert self.output_dim == 0 return self.lookup.apply(outputs) def get_dim(self, name): if name == 'feedback': return self.feedback_dim return super(LookupFeedback, self).get_dim(name)
def __init__(self, dimen, vocab_size): #{ # No idea what this is doing, but otherwise "allocated" is not set super(MorphGen, self).__init__(self) # The encoder encoder = Bidirectional(SimpleRecurrent(dim=dimen, activation=Tanh())) # What is this doing ? fork = Fork([name for name in encoder.prototype.apply.sequences if name != 'mask']) fork.input_dim = dimen fork.output_dims = [encoder.prototype.get_dim(name) for name in fork.input_names] lookup = LookupTable(vocab_size, dimen) transition = SimpleRecurrent(dim=dimen, activation=Tanh(), name="transition") atten = SequenceContentAttention(state_names=transition.apply.states,attended_dim=2*dimen, match_dim=dimen, name="attention") readout = Readout( readout_dim=vocab_size, source_names=[transition.apply.states[0], atten.take_glimpses.outputs[0]], emitter=SoftmaxEmitter(name="emitter"), feedback_brick=LookupFeedback(vocab_size, dimen), name="readout"); generator = SequenceGenerator(readout=readout, transition=transition, attention=atten,name="generator") self.lookup = lookup self.fork = fork self.encoder = encoder self.generator = generator self.children = [lookup, fork, encoder, generator]
def __init__(self, num_input_words, emb_dim, dim, vocab, lookup=None, translate=True, normalize=True, **kwargs): if num_input_words > 0: logger.info("Restricting def vocab to " + str(num_input_words)) self._num_input_words = num_input_words else: self._num_input_words = vocab.size() self._vocab = vocab self._translate = translate self._normalize = normalize children = [] if lookup is None: logger.info("emb_dim={}".format(emb_dim)) self._def_lookup = LookupTable(self._num_input_words, emb_dim, name='def_lookup') else: self._def_lookup = lookup # Makes sense for shared lookup. Then we precondition embeddings. # Doesn't makes otherwise (WH = W') # TODO(kudkudak): Refactor redundant translate parameter if self._translate: if emb_dim == dim: raise Exception("Redundant layer") self._def_translate = Linear(emb_dim, dim, name='def_translate') children.extend([self._def_translate]) else: if emb_dim != dim: raise Exception("Please pass translate=True if emb_dim != dim") children.append(self._def_lookup) super(MeanPoolReadDefinitions, self).__init__(children=children, **kwargs)
class CompositionalLayerToyWithTables(Initializable): def __init__(self, batch_size, num_subwords, num_words, subword_embedding_size, input_vocab_size, subword_RNN_hidden_state_size, **kwargs): super(CompositionalLayerToyWithTables, self).__init__(**kwargs) self.batch_size = batch_size self.num_subwords = num_subwords # number of subwords which make up a word self.num_words = num_words # number of words in the sentence self.subword_embedding_size = subword_embedding_size self.input_vocab_size = input_vocab_size self.subword_RNN_hidden_state_size = subword_RNN_hidden_state_size # create the look up table self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name='input_lookup') self.lookup.weights_init = Uniform(width=0.08) self.lookup.biases_init = Constant(0) # has one RNN which reads the subwords into a word embedding self.compositional_subword_to_word_RNN = SimpleRecurrent( dim=self.subword_RNN_hidden_state_size, activation=Identity(), name='subword_RNN', weights_init=Identity_init()) self.children = [self.lookup, self.compositional_subword_to_word_RNN] ''' subword_id_input_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size). It is expected as a dtype=uint16 or equivalent subword_id_input_mask_ is a 3d tensor with the dimensions of shape = (num_words, num_subwords, batch_size). It is expected as a dtype=uint8 or equivalent and has binary values of 1 when there is data and zero otherwise. The look up table will return a 4d tensor with shape = (num_words, num_subwords, batch_size, embedding size) The RNN will eat up the subwords dimension, resulting in a 3d tensor of shape = (num_words, batch_size, RNN_hidden_value_size), which is returned as 'word_embeddings' Also returned is a 2d tensor of shape = (num_words, batch_zize), which is the remaining mask indicated the length of the sentence for each sentence in the batch. i.e., 1 when there is a word, 0 otherwise. ''' @application(inputs=['subword_id_input_', 'subword_id_input_mask_'], outputs=['word_embeddings', 'word_embeddings_mask']) def apply(self, subword_id_input_, subword_id_input_mask_): ##shape = (num_words, num_subwords, batch_size, embedding size) subword_embeddings = self.lookup.apply(subword_id_input_) result, updates = theano.scan( #loop over each word and have the rnn eat up the subwords fn=lambda subword_embeddings, subword_id_input_mask_: self.compositional_subword_to_word_RNN.apply(subword_embeddings, mask=subword_id_input_mask_), sequences= [subword_embeddings, subword_id_input_mask_]) word_embeddings = result.dimshuffle(1,0,2,3) #put the states as the last dimension #remove this line to see the RNN states word_embeddings = word_embeddings[-1] #take only the last state, since we dont need the others #remove subword dim from mask #if subword is empty then word is emptry the word is emptry, if not then the word is used word_embeddings_mask = subword_id_input_mask_.max(axis=1) return word_embeddings, word_embeddings_mask
def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs): super(Encoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.lookup = LookupTable(name='embeddings') self.GRU = GatedRecurrent(activation=Tanh(), dim=state_dim) self.children = [self.lookup, self.GRU]
def __init__(self, dim, **kwargs): super(LookupBottom, self).__init__(**kwargs) self.dim = dim self.mask = tensor.matrix('inputs_mask') self.batch_inputs = {'inputs': tensor.lmatrix('inputs')} self.single_inputs = {'inputs': tensor.lvector('inputs')} self.children = [LookupTable(self.input_num_chars['inputs'], self.dim)]
def nn_fprop(x, x_mask, y, y_mask, lens, vocab_size, hidden_size, num_layers, model, boosting=False, **kwargs): lookup = LookupTable(length=vocab_size, dim=hidden_size) initialize([lookup]) h = lookup.apply(x) first = True for i in range(num_layers): if model == 'rnn': h = rnn_layer(hidden_size, h, i, x_mask=x_mask, first=first, **kwargs) elif model == 'gru': h = gru_layer(hidden_size, h, i, x_mask=x_mask, first=first, **kwargs) elif model == 'lstm': h = lstm_layer(hidden_size, h, i, x_mask=x_mask, first=first, **kwargs) else: print("models must either be rnn or lstm") sys.exit(0) first = False return softmax_layer(h, y, x_mask, y_mask, lens, vocab_size, hidden_size, boosting)
def __init__(self, vocab_size, embedding_dim, n_layers, skip_connections, state_dim, **kwargs): """Sole constructor. Args: vocab_size (int): Source vocabulary size embedding_dim (int): Dimension of the embedding layer n_layers (int): Number of layers. Layers share the same weight matrices. skip_connections (bool): Skip connections connect the source word embeddings directly with deeper layers to propagate the gradient more efficiently state_dim (int): Number of hidden units in the recurrent layers. """ super(BidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.n_layers = n_layers self.state_dim = state_dim self.skip_connections = skip_connections self.lookup = LookupTable(name='embeddings') if self.n_layers >= 1: self.bidir = BidirectionalWMT15( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork') self.back_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='back_fork') self.children = [self.lookup, self.bidir, self.fwd_fork, self.back_fork] if self.n_layers > 1: # Deep encoder self.mid_fwd_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='mid_fwd_fork') self.mid_back_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='mid_back_fork') self.children.append(self.mid_fwd_fork) self.children.append(self.mid_back_fork) elif self.n_layers == 0: self.embedding_dim = state_dim*2 self.children = [self.lookup] else: logging.fatal("Number of encoder layers must be non-negative")
def __init__(self, vocab_size, embedding_dim, state_dim, reverse=True, **kwargs): super(Encoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.reverse = reverse self.lookup = LookupTable(name='embeddings') self.transition = GatedRecurrent(Tanh(), name='encoder_transition') self.fork = Fork([name for name in self.transition.apply.sequences if name != 'mask'], prototype=Linear()) self.children = [self.lookup, self.transition, self.fork]
def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.lookup = LookupTable(name='embeddings') self.bidir = BidirectionalWMT15(GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork([name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork') self.back_fork = Fork([name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='back_fork') self.children = [self.lookup, self.bidir, self.fwd_fork, self.back_fork]
def __init__(self, blockid, vocab_size, embedding_dim, state_dim, **kwargs): super(Encoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.blockid = blockid self.lookup = LookupTable(name='embeddings' + '_' + self.blockid) self.gru = GatedRecurrent(activation=Tanh(), dim=state_dim, name = "GatedRNN" + self.blockid) self.fwd_fork = Fork( [name for name in self.gru.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork' + '_' + self.blockid) self.children = [self.lookup, self.gru, self.fwd_fork]
class BidirectionalEncoder(Initializable): """Encoder of RNNsearch model.""" def __init__(self, vocab_size, embedding_dim, state_dim, **kwargs): super(BidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.lookup = LookupTable(name='embeddings') self.bidir = BidirectionalWMT15( GatedRecurrent(activation=Tanh(), dim=state_dim)) self.fwd_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork') self.back_fork = Fork( [name for name in self.bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='back_fork') self.children = [self.lookup, self.bidir, self.fwd_fork, self.back_fork] def _push_allocation_config(self): self.lookup.length = self.vocab_size self.lookup.dim = self.embedding_dim self.fwd_fork.input_dim = self.embedding_dim self.fwd_fork.output_dims = [self.bidir.children[0].get_dim(name) for name in self.fwd_fork.output_names] self.back_fork.input_dim = self.embedding_dim self.back_fork.output_dims = [self.bidir.children[1].get_dim(name) for name in self.back_fork.output_names] @application(inputs=['source_sentence', 'source_sentence_mask'], outputs=['representation']) def apply(self, source_sentence, source_sentence_mask): # Time as first dimension source_sentence = source_sentence.T source_sentence_mask = source_sentence_mask.T embeddings = self.lookup.apply(source_sentence) representation = self.bidir.apply( merge(self.fwd_fork.apply(embeddings, as_dict=True), {'mask': source_sentence_mask}), merge(self.back_fork.apply(embeddings, as_dict=True), {'mask': source_sentence_mask}) ) return representation
def construct_model(vocab_size, embedding_dim, ngram_order, hidden_dims, activations): # Construct the model x = tensor.lmatrix('features') y = tensor.lvector('targets') lookup = LookupTable(length=vocab_size, dim=embedding_dim, name='lookup') hidden = MLP(activations=activations + [None], dims=[ngram_order * embedding_dim] + hidden_dims + [vocab_size]) embeddings = lookup.apply(x) embeddings = embeddings.flatten(ndim=2) # Concatenate embeddings activations = hidden.apply(embeddings) cost = Softmax().categorical_cross_entropy(y, activations) # Initialize parameters lookup.weights_init = IsotropicGaussian(0.001) hidden.weights_init = IsotropicGaussian(0.01) hidden.biases_init = Constant(0.001) lookup.initialize() hidden.initialize() return cost
def __init__(self, input1_size, input2_size, lookup1_dim=200, lookup2_dim=200, hidden_size=512): self.hidden_size = hidden_size self.input1_size = input1_size self.input2_size = input2_size self.lookup1_dim = lookup1_dim self.lookup2_dim = lookup2_dim x1 = tensor.lmatrix('durations') x2 = tensor.lmatrix('syllables') y = tensor.lmatrix('pitches') lookup1 = LookupTable(dim=self.lookup1_dim, length=self.input1_size, name='lookup1', weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) lookup1.initialize() lookup2 = LookupTable(dim=self.lookup2_dim, length=self.input2_size, name='lookup2', weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) lookup2.initialize() merge = Merge(['lookup1', 'lookup2'], [self.lookup1_dim, self.lookup2_dim], self.hidden_size, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) merge.initialize() recurrent_block = LSTM(dim=self.hidden_size, activation=Tanh(), weights_init=initialization.Uniform(width=0.01)) #RecurrentStack([LSTM(dim=self.hidden_size, activation=Tanh())] * 3) recurrent_block.initialize() linear = Linear(input_dim=self.hidden_size, output_dim=self.input1_size, weights_init=initialization.Uniform(width=0.01), biases_init=Constant(0)) linear.initialize() softmax = NDimensionalSoftmax() l1 = lookup1.apply(x1) l2 = lookup2.apply(x2) m = merge.apply(l1, l2) h = recurrent_block.apply(m) a = linear.apply(h) y_hat = softmax.apply(a, extra_ndim=1) # ValueError: x must be 1-d or 2-d tensor of floats. Got TensorType(float64, 3D) self.Cost = softmax.categorical_cross_entropy(y, a, extra_ndim=1).mean() self.ComputationGraph = ComputationGraph(self.Cost) self.Model = Model(y_hat)
def __init__(self, vocab_size, embedding_dim, n_layers, skip_connections, state_dim, **kwargs): """Sole constructor. Args: vocab_size (int): Source vocabulary size embedding_dim (int): Dimension of the embedding layer n_layers (int): Number of layers. Layers share the same weight matrices. skip_connections (bool): Skip connections connect the source word embeddings directly with deeper layers to propagate the gradient more efficiently state_dim (int): Number of hidden units in the recurrent layers. """ super(DeepBidirectionalEncoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.n_layers = n_layers self.state_dim = state_dim self.skip_connections = skip_connections self.lookup = LookupTable(name='embeddings') self.bidirs = [] self.fwd_forks =[] self.back_forks = [] for i in xrange(self.n_layers): bidir = BidirectionalWMT15( GatedRecurrent(activation=Tanh(), dim=state_dim), name='bidir%d' % i) self.bidirs.append(bidir) self.fwd_forks.append(Fork( [name for name in bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='fwd_fork%d' % i)) self.back_forks.append(Fork( [name for name in bidir.prototype.apply.sequences if name != 'mask'], prototype=Linear(), name='back_fork%d' % i)) self.children = [self.lookup] \ + self.bidirs \ + self.fwd_forks \ + self.back_forks
class Encoder(Initializable): def __init__( self, encoder_type, num_characters, input_dim, encoder_dim, **kwargs): assert encoder_type in [None, 'bidirectional'] self.encoder_type = encoder_type super(Encoder, self).__init__(**kwargs) self.children = [] if encoder_type in ['lookup', 'bidirectional']: self.embed_label = LookupTable( num_characters, input_dim, name='embed_label') self.children += [ self.embed_label] else: # If there is no encoder. assert num_characters == input_dim if encoder_type == 'bidirectional': transition = RecurrentWithFork( GatedRecurrent(dim=encoder_dim).apply, input_dim, name='encoder_transition') self.encoder = Bidirectional(transition, name='encoder') self.children.append(self.encoder) @application def apply(self, x, x_mask=None): if self.encoder_type is None: return x if self.encoder_type in ['lookup', 'bidirectional']: embed_x = self.embed_label.apply(x) if self.encoder_type == 'lookup': encoded_x = embed_x if self.encoder_type == 'bidirectional': encoded_x = self.encoder.apply(embed_x, x_mask) return encoded_x
class Encoder(Initializable): def __init__(self, vocab_size, embedding_dim, state_dim, reverse=True, **kwargs): super(Encoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.reverse = reverse self.lookup = LookupTable(name='embeddings') self.transition = GatedRecurrent(Tanh(), name='encoder_transition') self.fork = Fork([name for name in self.transition.apply.sequences if name != 'mask'], prototype=Linear()) self.children = [self.lookup, self.transition, self.fork] def _push_allocation_config(self): self.lookup.length = self.vocab_size self.lookup.dim = self.embedding_dim self.transition.dim = self.state_dim self.fork.input_dim = self.embedding_dim self.fork.output_dims = [self.state_dim for _ in self.fork.output_names] @application(inputs=['source_sentence', 'source_sentence_mask'], outputs=['representation']) def apply(self, source_sentence, source_sentence_mask): # Time as first dimension source_sentence = source_sentence.dimshuffle(1, 0) source_sentence_mask = source_sentence_mask.T if self.reverse: source_sentence = source_sentence[::-1] source_sentence_mask = source_sentence_mask[::-1] embeddings = self.lookup.apply(source_sentence) representation = self.transition.apply(**merge( self.fork.apply(embeddings, as_dict=True), {'mask': source_sentence_mask} )) return representation[-1]