def test_maxout(): x = tensor.tensor3() maxout = Maxout(num_pieces=3) y = maxout.apply(x) x_val = numpy.asarray(numpy.random.normal(0, 1, (4, 5, 24)), dtype=theano.config.floatX) assert_allclose(y.eval({x: x_val}), x_val.reshape(4, 5, 8, 3).max(3)) assert y.eval({x: x_val}).shape == (4, 5, 8)
def test_maxout(): x = tensor.tensor3() maxout = Maxout(num_pieces=3) y = maxout.apply(x) x_val = numpy.asarray(numpy.random.normal(0, 1, (4, 5, 24)), dtype=theano.config.floatX) assert_allclose( y.eval({x: x_val}), x_val.reshape(4, 5, 8, 3).max(3)) assert y.eval({x: x_val}).shape == (4, 5, 8)
def __init__(self, vocab_size, embedding_dim, state_dim, representation_dim, theano_seed=None, **kwargs): super(Decoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.representation_dim = representation_dim self.theano_seed = theano_seed # Initialize gru with special initial state. self.transition = GRUInitialState(attended_dim=state_dim, dim=state_dim, activation=Tanh(), name='decoder') # Initialize the attention mechanism. self.attention = SequenceContentAttention2( state_names=self.transition.apply.states, attended_dim=representation_dim, match_dim=state_dim, name="attention") readout = Readout(source_names=[ 'states', 'feedback', self.attention.take_glimpses.outputs[0] ], readout_dim=self.vocab_size, emitter=NewSoftmaxEmitter(initial_output=-1, theano_seed=theano_seed), feedback_brick=NewLookupFeedback( vocab_size, embedding_dim), post_merge=InitializableFeedforwardSequence([ Bias(dim=state_dim, name='maxout_bias').apply, Maxout(num_pieces=2, name='maxout').apply, Linear(input_dim=state_dim / 2, output_dim=embedding_dim, use_bias=False, name='softmax0').apply, Linear(input_dim=embedding_dim, name='softmax1').apply ]), merged_dim=state_dim) # Build sequence generator accordingly. self.sequence_generator = SequenceGenerator( readout=readout, transition=self.transition, attention=self.attention, fork=Fork([ name for name in self.transition.apply.sequences if name != 'mask' ], prototype=Linear()), cost_type='categorical_cross_entropy') self.children = [self.sequence_generator]
def __init__(self, vocab_size, embedding_dim, state_dim, representation_dim,topical_dim,theano_seed=None, **kwargs): super(Decoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.representation_dim = representation_dim self.theano_seed = theano_seed #self.topical_dim=topical_dim; # Initialize gru with special initial state self.transition = GRUInitialState( attended_dim=state_dim, dim=state_dim, activation=Tanh(), name='decoder') # Initialize the attention mechanism self.attention = SequenceContentAttention( state_names=self.transition.apply.states, attended_dim=representation_dim, match_dim=state_dim, name="attention") self.topical_attention=SequenceContentAttention( state_names=self.transition.apply.states, attended_dim=topical_dim, match_dim=state_dim, name="topical_attention")#not sure whether the match dim would be correct. # Initialize the readout, note that SoftmaxEmitter emits -1 for # initial outputs which is used by LookupFeedBackWMT15 readout = Readout( source_names=['states', 'feedback', self.attention.take_glimpses.outputs[0]],#check! readout_dim=self.vocab_size, emitter=SoftmaxEmitter(initial_output=-1, theano_seed=theano_seed), feedback_brick=LookupFeedbackWMT15(vocab_size, embedding_dim), post_merge=InitializableFeedforwardSequence( [Bias(dim=state_dim, name='maxout_bias').apply, Maxout(num_pieces=2, name='maxout').apply, Linear(input_dim=state_dim / 2, output_dim=embedding_dim, use_bias=False, name='softmax0').apply, Linear(input_dim=embedding_dim, name='softmax1').apply]), merged_dim=state_dim) # Build sequence generator accordingly self.sequence_generator = SequenceGenerator( readout=readout, transition=self.transition, attention=self.attention, topical_attention=self.topical_attention, topical_name='topical_embeddingq', content_name='content_embedding', fork=Fork([name for name in self.transition.apply.sequences if name != 'mask'], prototype=Linear()) ) self.children = [self.sequence_generator]
def __init__(self, vocab_size, embedding_dim, state_dim, representation_dim, **kwargs): super(Decoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.representation_dim = representation_dim self.transition = GRUInitialState(attended_dim=state_dim, dim=state_dim, activation=Tanh(), name='decoder') self.attention = SequenceContentAttention( state_names=self.transition.apply.states, attended_dim=representation_dim, match_dim=state_dim, name="attention") readout = Readout(source_names=[ 'states', 'feedback', self.attention.take_glimpses.outputs[0] ], readout_dim=self.vocab_size, emitter=SoftmaxEmitter(initial_output=-1), feedback_brick=LookupFeedbackWMT15( vocab_size, embedding_dim), post_merge=InitializableFeedforwardSequence([ Bias(dim=state_dim, name='maxout_bias').apply, Maxout(num_pieces=2, name='maxout').apply, Linear(input_dim=state_dim / 2, output_dim=embedding_dim, use_bias=False, name='softmax0').apply, Linear(input_dim=embedding_dim, name='softmax1').apply ]), merged_dim=state_dim, merge_prototype=Linear(use_bias=True)) self.sequence_generator = SequenceGenerator( readout=readout, transition=self.transition, attention=self.attention, fork=Fork([ name for name in self.transition.apply.sequences if name != 'mask' ], prototype=Linear())) self.children = [self.sequence_generator]
def __init__(self, vocab_size, embedding_dim, state_dim, representation_dim, **kwargs): super(Decoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.representation_dim = representation_dim readout = Readout( source_names=['states', 'feedback', 'readout_context'], readout_dim=self.vocab_size, emitter=SoftmaxEmitter(), feedback_brick=LookupFeedback(vocab_size, embedding_dim), post_merge=InitializableFeedforwardSequence([ Bias(dim=1000).apply, Maxout(num_pieces=2).apply, Linear(input_dim=state_dim / 2, output_dim=100, use_bias=False).apply, Linear(input_dim=100).apply ]), merged_dim=1000) self.transition = GatedRecurrentWithContext(Tanh(), dim=state_dim, name='decoder') # Readout will apply the linear transformation to 'readout_context' # with a Merge brick, so no need to fork it here self.fork = Fork([ name for name in self.transition.apply.contexts + self.transition.apply.states if name != 'readout_context' ], prototype=Linear()) self.tanh = Tanh() self.sequence_generator = SequenceGenerator( readout=readout, transition=self.transition, fork_inputs=[ name for name in self.transition.apply.sequences if name != 'mask' ], ) self.children = [self.fork, self.sequence_generator, self.tanh]
def __init__(self, vocab_size, embedding_dim, state_dim, att_dim, maxout_dim, representation_dim, attention_strategy='content', attention_sources='s', readout_sources='sfa', memory='none', memory_size=500, seq_len=50, init_strategy='last', theano_seed=None, **kwargs): """Creates a new decoder brick without embedding. Args: vocab_size (int): Target language vocabulary size embedding_dim (int): Size of feedback embedding layer state_dim (int): Number of hidden units att_dim (int): Size of attention match vector maxout_dim (int): Size of maxout layer representation_dim (int): Dimension of source annotations attention_strategy (string): Which attention should be used cf. ``_initialize_attention`` attention_sources (string): Defines the sources used by the attention model 's' for decoder states, 'f' for feedback readout_sources (string): Defines the sources used in the readout network. 's' for decoder states, 'f' for feedback, 'a' for attention (context vector) memory (string): Which external memory should be used (cf. ``_initialize_attention``) memory_size (int): Size of the external memory structure seq_len (int): Maximum sentence length init_strategy (string): How to initialize the RNN state (cf. ``GRUInitialState``) theano_seed: Random seed """ super(NoLookupDecoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.representation_dim = representation_dim self.theano_seed = theano_seed # Initialize gru with special initial state self.transition = GRUInitialState(attended_dim=state_dim, init_strategy=init_strategy, dim=state_dim, activation=Tanh(), name='decoder') # Initialize the attention mechanism att_dim = att_dim if att_dim > 0 else state_dim self.attention, src_names = _initialize_attention( attention_strategy, seq_len, self.transition, representation_dim, att_dim, attention_sources, readout_sources, memory, memory_size) # Initialize the readout, note that SoftmaxEmitter emits -1 for # initial outputs which is used by LookupFeedBackWMT15 maxout_dim = maxout_dim if maxout_dim > 0 else state_dim readout = Readout( source_names=src_names, readout_dim=embedding_dim, emitter=NoLookupEmitter(initial_output=-1, readout_dim=embedding_dim, cost_brick=SquaredError()), # cost_brick=CategoricalCrossEntropy()), feedback_brick=TrivialFeedback(output_dim=embedding_dim), post_merge=InitializableFeedforwardSequence([ Bias(dim=maxout_dim, name='maxout_bias').apply, Maxout(num_pieces=2, name='maxout').apply, Linear(input_dim=maxout_dim / 2, output_dim=embedding_dim, use_bias=False, name='softmax0').apply, Logistic(name='softmax1').apply ]), merged_dim=maxout_dim) # Build sequence generator accordingly self.sequence_generator = SequenceGenerator( readout=readout, transition=self.transition, attention=self.attention, fork=Fork([ name for name in self.transition.apply.sequences if name != 'mask' ], prototype=Linear())) self.children = [self.sequence_generator]
def __init__(self, vocab_size, embedding_dim, state_dim, representation_dim, context_dim, target_transition, theano_seed=None, loss_function='cross_entropy', **kwargs): super(InitialContextDecoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.representation_dim = representation_dim self.theano_seed = theano_seed # Initialize gru with special initial state self.transition = target_transition(attended_dim=state_dim, context_dim=context_dim, dim=state_dim, activation=Tanh(), name='decoder') # self.transition = GRUInitialStateWithInitialStateConcatContext( # attended_dim=state_dim, context_dim=context_dim, dim=state_dim, # activation=Tanh(), name='decoder') # Initialize the attention mechanism self.attention = SequenceContentAttention( state_names=self.transition.apply.states, attended_dim=representation_dim, match_dim=state_dim, name="attention") # Initialize the readout, note that SoftmaxEmitter emits -1 for # initial outputs which is used by LookupFeedBackWMT15 readout = Readout( source_names=[ 'states', 'feedback', # Chris: it's key that we're taking the first output of self.attention.take_glimpses.outputs # Chris: the first output is the weighted avgs, the second is the weights in (batch, time) self.attention.take_glimpses.outputs[0] ], readout_dim=self.vocab_size, emitter=SoftmaxEmitter(initial_output=-1, theano_seed=theano_seed), feedback_brick=LookupFeedbackWMT15(vocab_size, embedding_dim), post_merge=InitializableFeedforwardSequence([ Bias(dim=state_dim, name='maxout_bias').apply, Maxout(num_pieces=2, name='maxout').apply, Linear(input_dim=state_dim / 2, output_dim=embedding_dim, use_bias=False, name='softmax0').apply, Linear(input_dim=embedding_dim, name='softmax1').apply ]), merged_dim=state_dim) # Build sequence generator accordingly if loss_function == 'cross_entropy': self.sequence_generator = InitialContextSequenceGenerator( readout=readout, transition=self.transition, attention=self.attention, fork=Fork([ name for name in self.transition.apply.sequences if name != 'mask' ], prototype=Linear())) elif loss_function == 'min_risk': self.sequence_generator = MinRiskInitialContextSequenceGenerator( readout=readout, transition=self.transition, attention=self.attention, fork=Fork([ name for name in self.transition.apply.sequences if name != 'mask' ], prototype=Linear())) # the name is important, because it lets us match the brick hierarchy names for the vanilla SequenceGenerator # to load pretrained models # TODO: quick hack to fix bug self.sequence_generator.name = 'initialcontextsequencegenerator' else: raise ValueError( 'The decoder does not support the loss function: {}'.format( loss_function)) # TODO: uncomment this!! # self.sequence_generator.name = 'sequencegenerator' self.children = [self.sequence_generator]
def __init__(self, vocab_size, topicWord_size, embedding_dim, state_dim, topical_dim, representation_dim, match_function='SumMacthFunction', use_doubly_stochastic=False, lambda_ds=0.001, use_local_attention=False, window_size=10, use_step_decay_cost=False, use_concentration_cost=False, lambda_ct=10, use_stablilizer=False, lambda_st=50, theano_seed=None, **kwargs): super(Decoder, self).__init__(**kwargs) self.vocab_size = vocab_size self.topicWord_size = topicWord_size self.embedding_dim = embedding_dim self.state_dim = state_dim self.representation_dim = representation_dim self.theano_seed = theano_seed # Initialize gru with special initial state self.transition = GRU(attended_dim=state_dim, dim=state_dim, activation=Tanh(), name='decoder') self.energy_computer = globals()[match_function](name='energy_comp') # Initialize the attention mechanism self.attention = SequenceContentAttention( state_names=self.transition.apply.states, attended_dim=representation_dim, match_dim=state_dim, energy_computer=self.energy_computer, use_local_attention=use_local_attention, window_size=window_size, name="attention") self.topical_attention = SequenceContentAttention( state_names=self.transition.apply.states, attended_dim=topical_dim, match_dim=state_dim, energy_computer=self.energy_computer, use_local_attention=use_local_attention, window_size=window_size, name="topical_attention" ) #not sure whether the match dim would be correct. # Initialize the readout, note that SoftmaxEmitter emits -1 for # initial outputs which is used by LookupFeedBackWMT15 readout = Readout(source_names=[ 'states', 'feedback', self.attention.take_glimpses.outputs[0] ], readout_dim=self.vocab_size, emitter=SoftmaxEmitter(initial_output=-1, theano_seed=theano_seed), feedback_brick=LookupFeedbackWMT15( vocab_size, embedding_dim), post_merge=InitializableFeedforwardSequence([ Bias(dim=state_dim, name='maxout_bias').apply, Maxout(num_pieces=2, name='maxout').apply, Linear(input_dim=state_dim / 2, output_dim=embedding_dim, use_bias=False, name='softmax0').apply, Linear(input_dim=embedding_dim, name='softmax1').apply ]), merged_dim=state_dim, name='readout') # calculate the readout of topic word, # no specific feedback brick, use the trival feedback break # no post_merge and merge, use Bias and Linear topicWordReadout = Readout(source_names=[ 'states', 'feedback', self.attention.take_glimpses.outputs[0] ], readout_dim=self.topicWord_size, emitter=SoftmaxEmitter( initial_output=-1, theano_seed=theano_seed), name='twReadout') # Build sequence generator accordingly self.sequence_generator = SequenceGenerator( readout=readout, topicWordReadout=topicWordReadout, topic_vector_names=['topicSumVector'], transition=self.transition, attention=self.attention, topical_attention=self.topical_attention, q_dim=self.state_dim, #q_name='topic_embedding', topical_name='topic_embedding', content_name='content_embedding', use_step_decay_cost=use_step_decay_cost, use_doubly_stochastic=use_doubly_stochastic, lambda_ds=lambda_ds, use_concentration_cost=use_concentration_cost, lambda_ct=lambda_ct, use_stablilizer=use_stablilizer, lambda_st=lambda_st, fork=Fork([ name for name in self.transition.apply.sequences if name != 'mask' ], prototype=Linear())) self.children = [self.sequence_generator]