def __init__(self, blockid, vocab_size, embedding_dim, state_dim, representation_dim, theano_seed=None, **kwargs): super(Decoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.representation_dim = representation_dim self.theano_seed = theano_seed self.blockid = blockid # Initialize gru with special initial state self.transition = GRUInitialState( attended_dim=state_dim, dim=state_dim, activation=Tanh(), name='decoder' + '_' + self.blockid) # Initialize the attention mechanism self.attention = SequenceContentAttention( state_names=self.transition.apply.states, attended_dim=representation_dim, match_dim=state_dim, name="attention" + '_' + self.blockid) # Initialize the readout, note that SoftmaxEmitter emits -1 for # initial outputs which is used by LookupFeedBackWMT15 readout = Readout( source_names=['states', 'feedback', self.attention.take_glimpses.outputs[0]], readout_dim=self.vocab_size, emitter=SoftmaxEmitter(initial_output=-1, theano_seed=theano_seed, name = 'emitter' + '_' + self.blockid ), feedback_brick=LookupFeedbackWMT15(vocab_size, embedding_dim, name = 'lookup' + '_' + self.blockid), post_merge=InitializableFeedforwardSequence( [Bias(dim=state_dim, name='maxout_bias').apply, Maxout(num_pieces=2, name='maxout').apply, Linear(input_dim=state_dim / 2, output_dim=embedding_dim, use_bias=False, name='softmax0').apply, Linear(input_dim=embedding_dim, name='softmax1').apply], name = 'post_merge' + '_' + self.blockid), merged_dim=state_dim, hstates_dim = state_dim, name = 'readout' + '_' + self.blockid) # Build sequence generator accordingly self.sequence_generator = SequenceGenerator( readout=readout, transition=self.transition, attention=self.attention, fork=Fork([name for name in self.transition.apply.sequences if name != 'mask'], prototype=Linear(), name = 'fork' + '_' + self.blockid), name = 'sequence_generator' + '_' + self.blockid ) self.children = [self.sequence_generator]
class Decoder(Initializable): """Decoder of RNNsearch model.""" def __init__(self, blockid, vocab_size, embedding_dim, state_dim, representation_dim, theano_seed=None, **kwargs): super(Decoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.representation_dim = representation_dim self.theano_seed = theano_seed self.blockid = blockid # Initialize gru with special initial state self.transition = GRUInitialState( attended_dim=state_dim, dim=state_dim, activation=Tanh(), name='decoder' + '_' + self.blockid) # Initialize the attention mechanism self.attention = SequenceContentAttention( state_names=self.transition.apply.states, attended_dim=representation_dim, match_dim=state_dim, name="attention" + '_' + self.blockid) # Initialize the readout, note that SoftmaxEmitter emits -1 for # initial outputs which is used by LookupFeedBackWMT15 readout = Readout( source_names=['states', 'feedback', self.attention.take_glimpses.outputs[0]], readout_dim=self.vocab_size, emitter=SoftmaxEmitter(initial_output=-1, theano_seed=theano_seed, name = 'emitter' + '_' + self.blockid ), feedback_brick=LookupFeedbackWMT15(vocab_size, embedding_dim, name = 'lookup' + '_' + self.blockid), post_merge=InitializableFeedforwardSequence( [Bias(dim=state_dim, name='maxout_bias').apply, Maxout(num_pieces=2, name='maxout').apply, Linear(input_dim=state_dim / 2, output_dim=embedding_dim, use_bias=False, name='softmax0').apply, Linear(input_dim=embedding_dim, name='softmax1').apply], name = 'post_merge' + '_' + self.blockid), merged_dim=state_dim, hstates_dim = state_dim, name = 'readout' + '_' + self.blockid) # Build sequence generator accordingly self.sequence_generator = SequenceGenerator( readout=readout, transition=self.transition, attention=self.attention, fork=Fork([name for name in self.transition.apply.sequences if name != 'mask'], prototype=Linear(), name = 'fork' + '_' + self.blockid), name = 'sequence_generator' + '_' + self.blockid ) self.children = [self.sequence_generator] @application(inputs=['representation', 'source_sentence_mask', 'target_sentence_mask', 'target_sentence'], outputs=['cost', 'hstates']) def cost(self, representation, source_sentence_mask, target_sentence, target_sentence_mask ): source_sentence_mask = source_sentence_mask.T target_sentence = target_sentence.T target_sentence_mask = target_sentence_mask.T # Get the cost matrix cost, hiddenstates = self.sequence_generator.cost_matrix(**{ 'mask': target_sentence_mask, 'outputs': target_sentence, 'attended': representation, 'attended_mask': source_sentence_mask} ) return (cost * target_sentence_mask).sum() / \ target_sentence_mask.shape[1], hiddenstates @application def generate(self, source_sentence, representation, **kwargs): return self.sequence_generator.generate( n_steps=2 * source_sentence.shape[1], batch_size = source_sentence.shape[0], attended = representation, attended_mask=tensor.ones((source_sentence.shape[0], representation.shape[0])).T, **kwargs)