Exemplo n.º 1
0
    def __build_readout_decoder__(self):
        self.decoder.add(
            RepeatVector(
                self.sequence_len,
                input_shape=(self.enc_layer_output[-1],
                             )))  # Repeat the final vector for answer input
        # Using recurrentshop's container with readout
        container = RecurrentContainer(readout=True,
                                       return_sequences=True,
                                       output_length=self.sequence_len)
        if len(self.dec_layer_output) > 1:
            container.add(
                LSTMCell(output_dim=self.dec_layer_output[0],
                         input_dim=self.enc_layer_output[-1]))
            for dl in self.dec_layer_output[1:-1]:
                container.add(LSTMCell(output_dim=dl))
            container.add(LSTMCell(output_dim=self.enc_layer_output[-1]))
        else:
            container.add(
                LSTMCell(input_dim=self.enc_layer_output[-1],
                         output_dim=self.enc_layer_output[-1]))

        if self.enc_layer_output[-1] != self.dec_layer_output[-1]:
            print(
                'WARNING: Overriding final decoder output to %s for readout compatibility'
                % self.enc_layer_output[-1])
        self.decoder.add(container)
Exemplo n.º 2
0
    def __build_seq2seq_decoder__(self):
        # Using recurrentshop's decoder container
        container = RecurrentContainer(
            return_sequences=True,
            readout='add',
            output_length=self.sequence_len,
            input_shape=(self.enc_layer_output[-1], ),
            decode=True)
        if len(self.dec_layer_output) > 1:
            container.add(
                LSTMCell(output_dim=self.dec_layer_output[0],
                         input_dim=self.enc_layer_output[-1]))
            for dl in self.dec_layer_output[1:-1]:
                container.add(LSTMCell(output_dim=dl))
            container.add(LSTMCell(output_dim=self.enc_layer_output[-1]))
        else:
            container.add(
                LSTMCell(input_dim=self.enc_layer_output[-1],
                         output_dim=self.enc_layer_output[-1]))

        if self.enc_layer_output[-1] != self.dec_layer_output[-1]:
            print(
                'WARNING: Overriding final decoder output to %s for readout compatibility'
                % self.enc_layer_output[-1])
        self.decoder.add(container)
Exemplo n.º 3
0
def encoder_init(input,
                 postcshape,
                 hidden_dim,
                 depth,
                 dropout=0,
                 seq2seq=True,
                 bidirectional=True,
                 unroll=False,
                 stateful=False,
                 Encoder=None,
                 global_name="",
                 return_model=False):
    if Encoder == None:
        Encoder = [hidden_dim] * depth[0]
    else:
        if len(Encoder) < depth[0]:
            Encoder = Encoder + [hidden_dim] * (depth[0] - len(Encoder))
    encoder = RecurrentSequential(
        unroll=unroll,
        stateful=stateful,
        #   return_states=True, return_all_states=True, AllStateTransfer needs modification in the tensorflow backend
        return_sequences=True,
        name=global_name + 'encoder')
    encoder.add(LSTMCell(Encoder[0], batch_input_shape=postcshape[1:]))

    for k in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(Encoder[k]))

    if bidirectional:
        encoder = Bidirectional(encoder,
                                merge_mode='sum',
                                name=global_name + 'encoder')
        encoder.forward_layer.build(postcshape)
        encoder.backward_layer.build(postcshape)
        # patch
        encoder.layer = encoder.forward_layer
    if return_model:
        enc_input = Input(shape=postcshape[1:], name='encoder_input')
        encoded_out = encoder(enc_input)
        encoder_model = Model(inputs=[enc_input], outputs=[encoded_out])
        return encoder_model(input)
    return encoder(input)
Exemplo n.º 4
0
def Seq2Seq(output_dim,
            output_length,
            batch_input_shape=None,
            input_shape=None,
            batch_size=None,
            input_dim=None,
            input_length=None,
            hidden_dim=None,
            depth=1,
            broadcast_state=True,
            unroll=False,
            stateful=False,
            inner_broadcast_state=True,
            teacher_force=False,
            peek=False,
            dropout=0.):

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    encoder = RecurrentSequential(readout=True,
                                  state_sync=inner_broadcast_state,
                                  unroll=unroll,
                                  stateful=stateful,
                                  return_states=broadcast_state)
    for _ in range(depth[0]):
        encoder.add(
            LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))

    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state,
                                  decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful,
                                  teacher_force=teacher_force)

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim,
                            hidden_dim=hidden_dim,
                            batch_input_shape=(shape[0], output_dim)))

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]

    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq,
                          initial_state=states)

    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder
    return model
Exemplo n.º 5
0
def SimpleSeq2Seq(output_dim,
                  output_length,
                  hidden_dim=None,
                  input_shape=None,
                  batch_size=None,
                  batch_input_shape=None,
                  input_dim=None,
                  input_length=None,
                  depth=1,
                  dropout=0.0,
                  unroll=False,
                  stateful=False):
    '''
    Simple model for sequence to sequence learning.
    The encoder encodes the input sequence to vector (called context vector)
    The decoder decodes the context vector in to a sequence of vectors.
    There is no one on one relation between the input and output sequence
    elements. The input sequence and output sequence may differ in length.
    Arguments:
    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
            there will be 3 LSTMs on the enoding side and 3 LSTMs on the
            decoding side. You can also specify depth as a tuple. For example,
            if depth = (4, 5), 4 LSTMs will be added to the encoding side and
            5 LSTMs will be added to the decoding side.
    dropout : Dropout probability in between layers.
    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
    encoder = RecurrentSequential(unroll=unroll, stateful=stateful)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    decoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  decode=True,
                                  output_length=output_length)
    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim)))

    if depth[1] == 1:
        decoder.add(LSTMCell(output_dim))
    else:
        decoder.add(LSTMCell(hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(LSTMCell(hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(LSTMCell(output_dim))

    return encoder, decoder
Exemplo n.º 6
0
def Seq2Seq(output_dim,
            output_length,
            batch_input_shape=None,
            input_shape=None,
            batch_size=None,
            input_dim=None,
            input_length=None,
            hidden_dim=None,
            depth=1,
            broadcast_state=True,
            unroll=False,
            stateful=False,
            inner_broadcast_state=True,
            teacher_force=False,
            peek=False,
            dropout=0.):
    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    encoder = RecurrentSequential(readout=True,
                                  state_sync=inner_broadcast_state,
                                  unroll=unroll,
                                  stateful=stateful,
                                  return_states=broadcast_state)
    for _ in range(depth[0]):
        encoder.add(
            LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))

    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state,
                                  decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful,
                                  teacher_force=teacher_force)

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim,
                            hidden_dim=hidden_dim,
                            batch_input_shape=(shape[0], output_dim)))

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]

    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq,
                          initial_state=states)

    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder
    return model
Exemplo n.º 7
0
def Seq2Seq(output_dim, output_length, hidden_dim=None, depth=1, broadcast_state=True, inner_broadcast_state=True, peek=False, dropout=0., **kwargs):
	'''
	Seq2seq model based on [1] and [2].
	This model has the ability to transfer the encoder hidden state to the decoder's
	hidden state(specified by the broadcast_state argument). Also, in deep models 
	(depth > 1), the hidden state is propogated throughout the LSTM stack(specified by 
	the inner_broadcast_state argument. You can switch between [1] based model and [2] 
	based model using the peek argument.(peek = True for [2], peek = False for [1]).
	When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

	[1] based model:

		Encoder:
		X = Input sequence
		C = LSTM(X); The context vector

		Decoder:
        y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
        y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

		Encoder:
		X = Input sequence
		C = LSTM(X); The context vector

		Decoder:
        y(t) = LSTM(s(t-1), y(t-1), C)
        y(0) = LSTM(s0, C, C)
        Where s is the hidden state of the LSTM (h and c), and C is the context vector 
        from the encoder.

	Arguments:

	output_dim : Required output dimension.
	hidden_dim : The dimension of the internal representations of the model.
	output_length : Length of the required output sequence.
	depth : Used to create a deep Seq2seq model. For example, if depth = 3, 
			there will be 3 LSTMs on the enoding side and 3 LSTMs on the 
			decoding side. You can also specify depth as a tuple. For example,
			if depth = (4, 5), 4 LSTMs will be added to the encoding side and
			5 LSTMs will be added to the decoding side.
	broadcast_state : Specifies whether the hidden state from encoder should be 
					  transfered to the deocder.
	inner_broadcast_state : Specifies whether hidden states should be propogated 
							throughout the LSTM stack in deep models.
	peek : Specifies if the decoder should be able to peek at the context vector
		   at every timestep.
	dropout : Dropout probability in between layers.


	'''
	if type(depth) == int:
		depth = [depth, depth]
	if 'batch_input_shape' in kwargs:
		shape = kwargs['batch_input_shape']
		del kwargs['batch_input_shape']
	elif 'input_shape' in kwargs:
		shape = (None,) + tuple(kwargs['input_shape'])
		del kwargs['input_shape']
	elif 'input_dim' in kwargs:
		if 'input_length' in kwargs:
			shape = (None, kwargs['input_length'], kwargs['input_dim'])
			del kwargs['input_length']
		else:
			shape = (None, None, kwargs['input_dim'])
		del kwargs['input_dim']
	if 'unroll' in kwargs:
		unroll = kwargs['unroll']
		del kwargs['unroll']
	else:
		unroll = False
	if 'stateful' in kwargs:
		stateful = kwargs['stateful']
		del kwargs['stateful']
	else:
		stateful = False
	if not hidden_dim:
		hidden_dim = output_dim
	encoder = RecurrentContainer(readout=True, state_sync=inner_broadcast_state, input_length=shape[1], unroll=unroll, stateful=stateful)
	for i in range(depth[0]):
		encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim), **kwargs))
		encoder.add(Dropout(dropout))
	dense1 = TimeDistributed(Dense(hidden_dim))
	dense2 = Dense(output_dim)
	decoder = RecurrentContainer(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, output_length=output_length, unroll=unroll, stateful=stateful, decode=True)
	for i in range(depth[1]):
		decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
		decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs))
	input = Input(batch_shape=shape)
	encoded_seq = dense1(input)
	encoded_seq = encoder(encoded_seq)
	if broadcast_state:
		decoder.model.layers[1].states[:2] = encoder.state_outputs[-3:-1]
	encoded_seq = dense2(encoded_seq)
	decoder.initial_readout = encoded_seq
	decoded_seq = decoder(encoded_seq)
	model = Model(input, decoded_seq)
	model.encoder = encoder
	model.decoder = decoder
	return model
Exemplo n.º 8
0
def SimpleSeq2Seq(output_dim, output_length, hidden_dim=None, depth=1, dropout=0., **kwargs):
	'''
	Simple model for sequence to sequence learning.
	The encoder encodes the input sequence to vector (called context vector)
	The decoder decodes the context vector in to a sequence of vectors.
	There is no one on one relation between the input and output sequence elements.
	The input sequence and output sequence may differ in length.

	Arguments:

	output_dim : Required output dimension.
	hidden_dim : The dimension of the internal representations of the model.
	output_length : Length of the required output sequence.
	depth : Used to create a deep Seq2seq model. For example, if depth = 3, 
			there will be 3 LSTMs on the enoding side and 3 LSTMs on the 
			decoding side. You can also specify depth as a tuple. For example,
			if depth = (4, 5), 4 LSTMs will be added to the encoding side and
			5 LSTMs will be added to the decoding side.
	dropout : Dropout probability in between layers.

	'''
	if type(depth) == int:
		depth = [depth, depth]
	if 'batch_input_shape' in kwargs:
		shape = kwargs['batch_input_shape']
		del kwargs['batch_input_shape']
	elif 'input_shape' in kwargs:
		shape = (None,) + tuple(kwargs['input_shape'])
		del kwargs['input_shape']
	elif 'input_dim' in kwargs:
		if 'input_length' in kwargs:
			shape = (None, kwargs['input_length'], kwargs['input_dim'])
			del kwargs['input_length']
		else:
			shape = (None, None, kwargs['input_dim'])
		del kwargs['input_dim']
	if 'unroll' in kwargs:
		unroll = kwargs['unroll']
		del kwargs['unroll']
	else:
		unroll = False
	if 'stateful' in kwargs:
		stateful = kwargs['stateful']
		del kwargs['stateful']
	else:
		stateful = False
	if not hidden_dim:
		hidden_dim = output_dim
	encoder = RecurrentContainer(unroll=unroll, stateful=stateful, input_length=shape[1])
	encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]), **kwargs))
	for _ in range(1, depth[0]):
		encoder.add(Dropout(dropout))
		encoder.add(LSTMCell(hidden_dim, **kwargs))
	decoder = RecurrentContainer(unroll=unroll, stateful=stateful, decode=True, output_length=output_length)
	decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim)))
	decoder.add(LSTMCell(hidden_dim, **kwargs))
	for _ in range(1, depth[1]):
		decoder.add(Dropout(dropout))
		decoder.add(LSTMCell(hidden_dim, **kwargs))
	model = Sequential()
	model.add(encoder)
	model.add(decoder)
	return model
Exemplo n.º 9
0
def SimpleSeq2Seq(output_dim,
                  output_length,
                  hidden_dim=None,
                  input_shape=None,
                  batch_size=None,
                  batch_input_shape=None,
                  input_dim=None,
                  input_length=None,
                  is_embedding=True,
                  embedding_dim=None,
                  n_tokens=None,
                  depth=1,
                  dropout=0.0,
                  unroll=False,
                  stateful=False):
    '''
    Simple model for sequence to sequence learning.
    The encoder encodes the input sequence to vector (called context vector)
    The decoder decodes the context vector in to a sequence of vectors.
    There is no one on one relation between the input and output sequence
    elements. The input sequence and output sequence may differ in length.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
            there will be 3 LSTMs on the enoding side and 3 LSTMs on the
            decoding side. You can also specify depth as a tuple. For example,
            if depth = (4, 5), 4 LSTMs will be added to the encoding side and
            5 LSTMs will be added to the decoding side.
    dropout : Dropout probability in between layers.

    '''

    if isinstance(depth, int):
        depth = (depth, depth)

    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    elif input_length:
        if is_embedding == False and n_tokens > 0:
            pass
        else:
            raise TypeError

    if hidden_dim is None:
        hidden_dim = output_dim

    if is_embedding:
        _input = Input(batch_shape=shape)
    else:
        i = Input(shape=(input_length, ), name='sentence_input', dtype='int32')
        if embedding_dim is None:
            embedding_dim = hidden_dim
        _input = Embedding(input_dim=n_tokens,
                           output_dim=embedding_dim,
                           mask_zero=True,
                           input_length=input_length)(i)
        shape = (batch_size, ) + (input_length, ) + (embedding_dim, )

    encoder = RecurrentSequential(unroll=unroll, stateful=stateful)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    decoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  decode=True,
                                  output_length=output_length)
    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim)))

    if depth[1] == 1:
        decoder.add(LSTMCell(output_dim))
    else:
        decoder.add(LSTMCell(hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(LSTMCell(hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(LSTMCell(output_dim))

    x = encoder(_input)
    decoder_outputs = decoder(x)
    output = TimeDistributed(Dense(n_tokens,
                                   activation='softmax'))(decoder_outputs)
    if is_embedding:
        return Model(_input, output)
    else:
        return Model(i, output)
Exemplo n.º 10
0
    def __init__(self, vocab, config, hps):
        super(SummarizationModel, self).__init__(config)
        self.__name = 'pointer_generator_summarizer'
        self.config = config
        self.hps = hps

        self.mode = config['mode']
        self.use_coverage = config['use_coverage']
        self.pointer_gen = config['pointer_gen']
        self.embed_trainable = config['train_embed']
        self.embedding_size = config['embed_size']
        self.vsize = config['vocab_size']
        self.rand_unif_init_mag = config['rand_unif_init_mag']
        self.trunc_norm_init_std = config['trunc_norm_init_std']
        self.hidden_units = self.config['hidden_units']
        self.cov_loss_wt = self.config['cov_loss_wt']

        # Initializers:
        self.rand_unif_init = RandomUniform(minval=-self.rand_unif_init_mag,
                                            maxval=self.rand_unif_init_mag,
                                            seed=123)
        self.trunc_norm_init = TruncatedNormal(stddev=self.trunc_norm_init_std)
        # Optimizers:
        self.adg = optimizers.TFOptimizer(
            K.tf.train.AdagradOptimizer(
                self.hps.lr,
                initial_accumulator_value=self.hps.adagrad_init_acc))
        # Layers
        self.Emb = Embedding(self.vsize,
                             self.embedding_size,
                             weights=config['embed'],
                             trainable=self.embed_trainable)

        # different dictionary for source and target

        # Bi-directional lstm encoder, return (output, states)
        # Dimension: 2*hidden_units
        # concatenated forward and backward vectors
        self.Encoder = Bidirectional(
            CuDNNLSTM(self.hidden_units,
                      return_state=True,
                      return_sequences=True,
                      kernel_initializer=self.rand_unif_init))
        # Decoder is not bi-directional, perform linear reduction...
        # Dense_layer_dimension=encoder_hidden_units

        # Encoder states and output tensors are separated...
        # to initialize decoder

        # Decoder cell input: [input, state_h, state_c]
        self.DecoderCell = LSTMCell(self.hidden_units,
                                    kernel_initializer=self.rand_unif_init,
                                    bias_initializer="zeros",
                                    recurrent_initializer=self.rand_unif_init)
        # Decoder output projector
        # to probabilities[word_index]
        self.DecoderOutputProjector = Dense(
            self.vsize,
            kernel_initializer=self.trunc_norm_init,
            bias_initializer=self.trunc_norm_init,
            activation=None)
        self.ConcatenateAxis1 = Concatenate(axis=1)
        self.ConcatenateLastDim = Concatenate(axis=-1)
        self.StackSecondDim = Lambda(lambda x: K.tf.stack(x, axis=1))
        self.SoftmaxforScore = Softmax(axis=-1)

        self._batch_size = None
        self._enc_batch = None
        self._enc_lens = None
        self._enc_padding_mask = None
        self._enc_batch_extend_vocab = None
        self._max_art_oovs = None
        self._max_art_oovs_inp = None
        self._dec_batch = None
        self._target_batch = None
        self._dec_padding_mask = None
        self._dec_in_state = None
        self._enc_states = None
        self._dec_out_state = None
        self.p_gens = None
        self.prev_coverage = None
        self.coverage = None
        self._coverage_loss = None

        self.check_list = []

        if not self.check():
            pass
        pass
Exemplo n.º 11
0
def AttentionSeq2Seq(
    output_dim,
    output_length,
    batch_input_shape=None,
    batch_size=None,
    input_shape=None,
    input_length=None,
    input_dim=None,
    hidden_dim=None,
    depth=1,
    bidirectional=True,
    unroll=False,
    stateful=False,
    dropout=0.0,
):
    '''
    [1] Sequence to Sequence Learning with Neural Networks
    [2] Learning Phrase Representations using RNN Encoder–Decoder for Statistical Machine Translation
    [3] Neural Machine Translation by Jointly Learning to Align and Translate
    [4] A Neural Conversational Model

    This is an attention Seq2seq model based on [3].
    Here, there is a soft allignment between the input and output sequence elements.
    A bidirection encoder is used by default. There is no hidden state transfer in this
    model.

    The  math:

            Encoder:
            X = Input Sequence of length m.
            H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True,
            so H is a sequence of vectors of length m.

            Decoder:
    y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
    and v (called the context vector) is a weighted sum over H:

    v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)

    The weight alpha[i, j] for each hj is computed as follows:
    energy = a(s(i-1), H(j))
    alpha = softmax(energy)
    Where a is a feed forward network.

    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
    # shape:[batch, max_encoder_length, input_dim]
    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    # 1.定义encoder
    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(
        hidden_dim,
        batch_input_shape=(shape[0],
                           shape[2])))  # shape[0]:batch, shape[2]:input_dim

    for _ in range(1, depth[0]):  # 所谓的depth,就是lstm堆叠的层数
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(
            shape)  # [batch, max_encoder_length, input_dim]
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    # 2.encode
    # _input:[batch, max_encoder_length, input_dim]
    # encoded: [batch, max_encoder_length, hidden]
    encoded = encoder(_input)

    # 3.定义decoder
    decoder = RecurrentSequential(
        decode=True,
        output_length=output_length,
        unroll=unroll,  # False
        stateful=stateful)  # False

    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    # attention
    decoder.add(
        AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    if depth[1] != 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))

        decoder.add(Dropout(dropout))

        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    # 4. decode
    decoded = decoder(encoded)

    inputs = [_input]
    model = Model(inputs=inputs, outputs=decoded)

    return model
Exemplo n.º 12
0
def SimpleSeq2Seq(output_dim, output_length, hidden_dim=None, input_shape=None,
                  batch_size=None, batch_input_shape=None, input_dim=None,
                  input_length=None, depth=1, dropout=0.0, unroll=False,
                  stateful=False):

    '''
    Simple model for sequence to sequence learning.
    The encoder encodes the input sequence to vector (called context vector)
    The decoder decodes the context vector in to a sequence of vectors.
    There is no one on one relation between the input and output sequence
    elements. The input sequence and output sequence may differ in length.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
            there will be 3 LSTMs on the enoding side and 3 LSTMs on the
            decoding side. You can also specify depth as a tuple. For example,
            if depth = (4, 5), 4 LSTMs will be added to the encoding side and
            5 LSTMs will be added to the decoding side.
    dropout : Dropout probability in between layers.

    '''

    '''
        Below block is trying to construct the shape tuple which is (batch_size,input_dim,input_length)
        batch_shape(batch_size,input_dim,input_length)
        # TODO: Add more information about the statefulness of the LSTM and the input_shape expected by the same.    
    '''
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
    '''
        batch_input_shape=(batch_size, timesteps, data_dim)
        for lstm arguments :- https://keras.io/layers/recurrent/#lstm
            >Specifically hidden_dim = units and it is a Positive integer representing dimensionality of the output space.
    '''    
    encoder = RecurrentSequential(unroll=unroll, stateful=stateful)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    decoder = RecurrentSequential(unroll=unroll, stateful=stateful,
                                  decode=True, output_length=output_length)
    ''' 
        hidden_dim is here equal to the output dim of the encoder thing. Hence should not have shape error.
        Also the second arguments is the expected dimension of the input. See below comment
        '''
    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim)))

    if depth[1] == 1:
        decoder.add(LSTMCell(output_dim))
    else:
        decoder.add(LSTMCell(hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(LSTMCell(hidden_dim))
    decoder.add(Dropout(dropout))
    decoder.add(LSTMCell(output_dim))

    '''
        This returns a tensor 
            >batch_shape(batch_size,input_dim,input_length)
        https://keras.io/layers/recurrent/ :- Note on using statefulness in RNNs
    '''
    _input = Input(batch_shape=shape)

    x = encoder(_input)
    output = decoder(x)
    ''' 
    This Model thing; takes _input as the input and applies everything to reach output. 
    This model will include all layers required in the computation of output given _input. So one can 
    think of them as Theano function.
     '''
    return Model(_input, output)
def Seq2Seq(output_dim, output_length, batch_input_shape=None,
            input_shape=None, batch_size=None, input_dim=None, input_length=None,
            hidden_dim=None, depth=1, broadcast_state=True, unroll=False,
            stateful=False, inner_broadcast_state=True, teacher_force=False,
            peek=False, dropout=0.):

    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''

    if isinstance(depth, int):
        depth = (depth, depth) 
        # depth是整数时,相当于编码器和解码器都有相同的层数
    if batch_input_shape:
        shape = batch_input_shape 
        # 批输入的shape作为模型输入的shape
    elif input_shape:
        shape = (batch_size,) + input_shape 
        # 不指定batch input shape,则用批大小拼接input shape,如batch size为32,input为768,拼接后就是(32,768)
        # input shape 必须是一个元组
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
            # 一般情况下通用的shape(批大小,输入序列长度,输入维度)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
        # 隐藏层的维度如果也是None?那代表什么呢

    encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state,
                                  unroll=unroll, stateful=stateful,
                                  return_states=broadcast_state)
    '''
    参数:
        readout:是否额外将输出进行处理 选项有add(True),multiply,average,maximum等
        state_sync:状态是否在内部传播,源码中对initial_states的处理不一样,对每个cell的state都进行传播到下一个batch
        stateful:keras特性,在不同的batch之间传递cells的状态,而不是仅仅在cell之间传递状态,即stateful
                  在stateful = True 时,我们要在fit中手动使得shuffle = False。随后,在X[i](表示输入矩阵中第
                  i个sample)这个小序列训练完之后,Keras会将将训练完的记忆参数传递给X[i+bs](表示第i+bs个sample),
                  作为其初始的记忆参数。
        unroll:keras特性,将LSTM网络展开,也就是原本的时序序列直接展开成多个cell拼接,可以加快速度,但是占用更多内存
    '''
    for _ in range(depth[0]):
        encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))
    # 根据depth[0]指定编码器深度

    dense1 = TimeDistributed(Dense(hidden_dim))
    '''
    # dence1:
    # 使用TimeDistributed层对1个batch中样本(input_length,input_dim)每个向量都进行Dense操作,在整个length长度下,这个样本
    # 都共享TimeDistributed层的权重,即输出后变成(batch_size,input_length,hidden_dim)
    '''
    dense1.supports_masking = True
    dense2 = Dense(output_dim)
    '''
    dence2:
        处理从encoder之后的编码,整型为output_dim,再送给decoder
    '''

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state, decode=True,
                                  output_length=output_length, unroll=unroll,
                                  stateful=stateful, teacher_force=teacher_force)
    '''
    参数:
        teaching force :它每次不使用上一个state的输出作为下一个state的输入,而是直接
        使用训练数据的标准答案(ground truth)的对应上一项作为下一个state的输入。
        结合beam search和计划抽样,使用一个概率p来决定使用teaching还是free training,随着训练epoch增加,
        概率p也会减少,相当于逐步的减小teaching的采样频率,确保模型既能快速学习,又有泛化能力
    '''

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim,
                                    batch_input_shape=(shape[0], output_dim)))
    # 根据depth[1]指定解码器的深度

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    # 对输入数据先通过TimeDistributed层,处理成hidden_dim的向量维度
    encoded_seq = encoder(encoded_seq)
    # 再通过encoder编码
    # 以下是一些选项的处理,是否广播状态,是否teaching模式等
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]
    # 编码之后的后续处理

    # 解码,initial_state是否接受从编码器传递过来的状态,
    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq, initial_state=states)
    
    seq2seq_model = Model(inputs, decoded_seq)
    # 整个模型就是从输入到解码seq,可以将编码器单独拿出来,使用其中的编码
    # 另外,模型处理的实时新闻序列到股价波动序列,如果要将休盘期内新闻信息也纳入训练,
    # 则需要共享编码器和解码权重,并增加新的Flatten和Dence层,将解码器输出序列视为波动编码,再进入Dence输出标量
    # 涉及到,在RecurrentSequential后增加Sequencial序列
    seq2seq_model.encoder = encoder
    seq2seq_model.decoder = decoder

    decoded_vec = Flatten()(decoded_seq)
    decoded_vec = Dense(1, activation='tanh')(decoded_vec)
    seq2vec_model = Model(inputs, [decoded_seq, decoded_vec])
    # 最终模型有1个输入,2个输出

    return seq2vec_model
Exemplo n.º 14
0
def AttentionSeqtoSeq(output_dim, output_length, batch_input_shape=None,
                     batch_size=None, input_shape=None, input_length=None,
                     input_dim=None, hidden_dim=None, depth=1,
                     bidirectional=True, unroll=False, stateful=False, dropout=0.0,
                     ):
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    encoder = RecurrentSequential(unroll=unroll, stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    for _ in range(1, depth[0]):
        #encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    encoded = encoder(_input)
    #decoder_input = Input(batch_shape = encoded.shape)#新加的
    decoder = RecurrentSequential(decode=True, output_length=output_length,
                                  unroll=unroll, stateful=stateful)

    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            #decoder.add(Dropout(dropout))
            decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        #decoder.add(Dropout(dropout))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        decoder.add(Dense(output_dim*2))
        decoder.add(Dense(output_dim,activation = "softmax"))
    
    inputs = [_input]
    decoded = decoder(encoded)
    model = Model(inputs, decoded)
    return model
Exemplo n.º 15
0
from recurrentshop import LSTMCell, RecurrentModel
""" Example: Create a simple LSTM that supports readout with an initial readout
input. """

# Main cell input.
cell_input = layers.Input(shape=(10, ))
# Readout input.
readout_input = layers.Input(shape=(100, ))

# Internal inputs for the LSTM cell.
last_state = layers.Input(shape=(100, ))
last_output = layers.Input(shape=(100, ))

# Create the LSTM layer.
fused_inputs = layers.concatenate([cell_input, readout_input])
lstm1_o, lstm1_h, lstm1_c = LSTMCell(100)(
    [cell_input, last_state, last_output])

# Build the RNN.
rnn = RecurrentModel(input=cell_input,
                     output=lstm1_o,
                     initial_states=[last_state, last_output],
                     final_states=[lstm1_h, lstm1_c],
                     readout_input=readout_input)

# Main sequence input.
sequence_input = layers.Input(shape=(50, 10))
# Initial readout input.
initial_readout = layers.Input(shape=(100, ))

rnn_output = rnn(sequence_input, initial_readout=initial_readout)
Exemplo n.º 16
0
def mctn_model(output_dim,
               output_length,
               batch_input_shape=None,
               batch_size=None,
               input_shape=None,
               input_length=None,
               input_dim=None,
               hidden_dim=None,
               depth=1,
               bidirectional=True,
               unroll=False,
               stateful=False,
               dropout=0,
               is_cycled=True):
    """
  MCTN Model (by default with Cycle Consistency Loss) 
  """
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    # encoder phase
    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)

    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))
    # encoder.add(Dropout(dropout))
    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    encoded = encoder(_input)

    # decoder phase
    decoder = RecurrentSequential(
        decode=True,
        output_length=1,  #output_length
        unroll=unroll,
        stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    inputs = [_input]
    decoded_0 = decoder(encoded)
    decoded = Reshape((output_dim, ))(decoded_0)

    # cycle phase
    cycled_decoded = None
    if is_cycled:
        cycled_encoded = encoder(decoded_0)
        cycled_decoded = decoder(cycled_encoded)

    return inputs, encoded, decoded, cycled_decoded
Exemplo n.º 17
0
def paired_trimodal_model(output_dim,
                          output_length,
                          batch_input_shape=None,
                          batch_size=None,
                          input_shape=None,
                          input_length=None,
                          input_dim=None,
                          hidden_dim=None,
                          depth=1,
                          bidirectional=True,
                          unroll=False,
                          stateful=False,
                          dropout=0.0):
    """
  One modal translates into two other modalities, no cycle involved 
  The model has 1 encoder and 2 decoders 
  """
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError

    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    # encoder phase
    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    # encoder phase
    encoder_2 = RecurrentSequential(unroll=unroll,
                                    stateful=stateful,
                                    return_sequences=True)
    encoder_2.add(
        LSTMCell(hidden_dim, batch_input_shape=(shape[0], output_dim)))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

        encoder_2.add(Dropout(dropout))
        encoder_2.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

        encoder_2 = Bidirectional(encoder_2, merge_mode='sum')
        encoder_2.forward_layer.build(shape)
        encoder_2.backward_layer.build(shape)
        # patch
        encoder_2.layer = encoder_2.forward_layer

    encoded_one = encoder(_input)

    # decoder phase
    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))

    decoder_2 = RecurrentSequential(decode=True,
                                    output_length=input_length,
                                    unroll=unroll,
                                    stateful=stateful)
    decoder_2.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))

    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    if depth[1] == 1:
        decoder_2.add(
            AttentionDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim))
    else:
        decoder_2.add(
            AttentionDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder_2.add(Dropout(dropout))
            decoder_2.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder_2.add(Dropout(dropout))
        decoder_2.add(
            LSTMDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim))

    inputs = [_input]
    decoded_one = decoder(encoded_one)

    encoded_two = encoder_2(decoded_one)
    decoded_two = decoder_2(encoded_two)

    return inputs, encoded_one, encoded_two, decoded_one, decoded_two
Exemplo n.º 18
0
def Seq2Seq(output_dim, output_length, lookup_matrix, hidden_dim=None, depth=1, broadcast_state=True, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0., **kwargs):
    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models 
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by 
    the inner_broadcast_state argument. You can switch between [1] based model and [2] 
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

        Encoder:
        X = Input sequence
        C = LSTM(X); The context vector

        Decoder:
        y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
        y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

        Encoder:
        X = Input sequence
        C = LSTM(X); The context vector

        Decoder:
        y(t) = LSTM(s(t-1), y(t-1), C)
        y(0) = LSTM(s0, C, C)
        Where s is the hidden state of the LSTM (h and c), and C is the context vector 
        from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3, 
            there will be 3 LSTMs on the enoding side and 3 LSTMs on the 
            decoding side. You can also specify depth as a tuple. For example,
            if depth = (4, 5), 4 LSTMs will be added to the encoding side and
            5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be 
                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated 
                            throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
           at every timestep.
    dropout : Dropout probability in between layers.


    '''
    if type(depth) == int:
        depth = [depth, depth]
    if 'batch_input_shape' in kwargs:
        shape = kwargs['batch_input_shape']
        del kwargs['batch_input_shape']
    elif 'input_shape' in kwargs:
        shape = (None,) + tuple(kwargs['input_shape'])
        del kwargs['input_shape']
    elif 'input_dim' in kwargs:
        if 'input_length' in kwargs:
            shape = (None, kwargs['input_length'], kwargs['input_dim'])
            del kwargs['input_length']
        else:
            shape = (None, None, kwargs['input_dim'])
        del kwargs['input_dim']
    if 'unroll' in kwargs:
        unroll = kwargs['unroll']
        del kwargs['unroll']
    else:
        unroll = False
    if 'stateful' in kwargs:
        stateful = kwargs['stateful']
        del kwargs['stateful']
    else:
        stateful = False
    if not hidden_dim:
        hidden_dim = output_dim

    encoder = RecurrentContainer(readout=True, state_sync=inner_broadcast_state, input_length=shape[1], unroll=unroll, stateful=stateful, return_states=broadcast_state)
    for i in range(depth[0]):
        encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim), **kwargs))
        encoder.add(Dropout(dropout))
    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    decoder = RecurrentContainer(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, output_length=output_length, unroll=unroll, stateful=stateful, decode=True, input_length=shape[1])
    for i in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs))

    input = Input(batch_shape=(shape[0],shape[1]))
    print input.shape
    embedded_input = Embedding(input_dim=lookup_matrix.shape[0], output_dim=lookup_matrix.shape[1], weights=[lookup_matrix])(input)
    print embedded_input.shape
    input._keras_history[0].supports_masking = True

    encoded_seq = dense1(embedded_input)
    # print encoded_seq.shape
    encoded_seq = encoder(encoded_seq)
    print encoded_seq

    if broadcast_state:
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = [None] * 2
    encoded_seq = dense2(encoded_seq)
    inputs = [input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]
    decoded_seq = decoder({'input': encoded_seq, 'ground_truth': inputs[1] if teacher_force else None, 'initial_readout': encoded_seq, 'states': states})

    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder

    print "==========Input========="
    print model.input
    print "==========Input========="
    print model.output

    return model
Exemplo n.º 19
0
def Seq2Seq(output_dim,
            output_length,
            hidden_dim=None,
            batch_input_shape=None,
            input_shape=None,
            batch_size=None,
            input_dim=None,
            input_length=None,
            is_embedding=True,
            embedding_dim=None,
            n_tokens=None,
            depth=1,
            broadcast_state=True,
            unroll=False,
            stateful=False,
            inner_broadcast_state=False,
            teacher_force=False,
            peek=False,
            dropout=0.):
    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''

    if isinstance(depth, int):
        depth = (depth, depth)

    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    elif input_length:
        if is_embedding == False and n_tokens > 0:
            pass
        else:
            raise TypeError

    if hidden_dim is None:
        hidden_dim = output_dim

    if is_embedding:
        _input = Input(batch_shape=shape)
    else:
        i = Input(shape=(input_length, ), name='sentence_input', dtype='int32')
        if embedding_dim is None:
            embedding_dim = hidden_dim
        _input = Embedding(input_dim=n_tokens,
                           output_dim=embedding_dim,
                           mask_zero=True,
                           input_length=input_length)(i)
        shape = (batch_size, ) + (input_length, ) + (embedding_dim, )

    encoder = RecurrentSequential(readout=True,
                                  unroll=unroll,
                                  stateful=stateful,
                                  return_states=broadcast_state)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1])))
    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    # dense2 = Dense(output_dim)

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim)))

    if depth[1] == 1:
        #decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        decoder.add(LSTMCell(output_dim))
    else:
        #decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        decoder.add(LSTMCell(hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            #decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
            decoder.add(LSTMCell(hidden_dim))
        decoder.add(Dropout(dropout))
        #decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        decoder.add(LSTMCell(output_dim))

    x = encoder(_input)
    if broadcast_state:
        assert type(x) is list
        states = x[-2:]
        x = x[0]
    else:
        states = None
    decoder_outputs = decoder(x, initial_state=states, initial_readout=x)
    output = TimeDistributed(Dense(n_tokens,
                                   activation='softmax'))(decoder_outputs)

    if is_embedding:
        return Model(_input, output)
    else:
        return Model(i, output)
Exemplo n.º 20
0
def SimpleSeq2Seq(output_dim, output_length, latent_dim, batch_size, epsilon_std, 
	                    lookup_matrix=None, hidden_dim=None, depth=1, dropout=0., **kwargs):
    '''
    Simple model for sequence to sequence learning.
    The encoder encodes the input sequence to vector (called context vector)
    The decoder decodes the context vector in to a sequence of vectors.
    There is no one on one relation between the input and output sequence elements.
    The input sequence and output sequence may differ in length.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3, 
            there will be 3 LSTMs on the enoding side and 3 LSTMs on the 
            decoding side. You can also specify depth as a tuple. For example,
            if depth = (4, 5), 4 LSTMs will be added to the encoding side and
            5 LSTMs will be added to the decoding side.
    dropout : Dropout probability in between layers.

    '''
    if type(depth) == int:
        depth = [depth, depth]
    if 'batch_input_shape' in kwargs:
        shape = kwargs['batch_input_shape']
        del kwargs['batch_input_shape']
    elif 'input_shape' in kwargs:
        shape = (None,) + tuple(kwargs['input_shape'])
        del kwargs['input_shape']
    elif 'input_dim' in kwargs:
        if 'input_length' in kwargs:
            shape = (None, kwargs['input_length'], kwargs['input_dim'])
            del kwargs['input_length']
        else:
            shape = (None, None, kwargs['input_dim'])
        del kwargs['input_dim']
    if 'unroll' in kwargs:
        unroll = kwargs['unroll']
        del kwargs['unroll']
    else:
        unroll = False
    if 'stateful' in kwargs:
        stateful = kwargs['stateful']
        del kwargs['stateful']
    else:
        stateful = False
    if not hidden_dim:
        hidden_dim = output_dim


    embedder = Embedding(input_dim=lookup_matrix.shape[0], output_dim=lookup_matrix.shape[1], \
    	input_length=output_length, weights=[lookup_matrix])
    encoder = RecurrentContainer(unroll=unroll, stateful=stateful, input_length=shape[1])
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]), **kwargs))
    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim, **kwargs))

    decoder = RecurrentContainer(unroll=unroll, stateful=stateful, decode=True, output_length=output_length, input_length=shape[1])
    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim)))

    if depth[1] == 1:
        decoder.add(LSTMCell(output_dim, **kwargs))
    else:
        decoder.add(LSTMCell(hidden_dim, **kwargs))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(LSTMCell(hidden_dim, **kwargs))

        decoder.add(Dropout(dropout))
        decoder.add(LSTMCell(output_dim, **kwargs))


    x = Input(batch_shape=(None,output_length))
    embedded_x = embedder(x)

    h_encoded = encoder(embedded_x)

    def sampling(args):
        z_mean, z_log_var = args         
        epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0., std=epsilon_std)
        return z_mean + K.exp(z_log_var / 2) * epsilon

    z_mean = Dense(latent_dim)(h_encoded)
    z_log_var = Dense(latent_dim)(h_encoded)
    z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
    print z

    h_0 = Dense(hidden_dim, activation='relu')(z)
    print h_0
    model_out = decoder(h_0)
    print model_out

    # model_out = decoder(h_encoded)

    y = Input(batch_shape=(None,output_length))
    embedded_y = embedder(y)
    print embedded_y

    loss = tf.reduce_sum(mean_squared_error(model_out, embedded_y))

    adam = tf.train.AdamOptimizer()
    optimizer = adam.minimize(loss)

    return optimizer, loss, x, y
Exemplo n.º 21
0
def AttentionSeq2Seq(
    output_dim,
    output_length,
    batch_input_shape=None,
    batch_size=None,
    input_shape=None,
    input_length=None,
    is_embedding=True,
    embedding_dim=None,
    n_tokens=1000,
    input_dim=None,
    hidden_dim=None,
    depth=1,
    bidirectional=False,
    unroll=False,
    stateful=False,
    dropout=0.0,
):
    '''
    This is an attention Seq2seq model based on [3].
    Here, there is a soft allignment between the input and output sequence elements.
    A bidirection encoder is used by default. There is no hidden state transfer in this
    model.

    The  math:

            Encoder:
            X = Input Sequence of length m.
            H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True,
            so H is a sequence of vectors of length m.

            Decoder:
    y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
    and v (called the context vector) is a weighted sum over H:

    v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)

    The weight alpha[i, j] for each hj is computed as follows:
    energy = a(s(i-1), H(j))
    alpha = softmax(energy)
    Where a is a feed forward network.

    '''

    if isinstance(depth, int):
        depth = (depth, depth)

    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    elif input_length:
        if is_embedding == False and n_tokens > 0:
            pass
        else:
            raise TypeError

    if hidden_dim is None:
        hidden_dim = output_dim
    if is_embedding:
        _input = Input(batch_shape=shape)
        _input._keras_history[0].supports_masking = True
    else:
        i = Input(shape=(input_length, ), name='sentence_input', dtype='int32')
        i._keras_history[0].supports_masking = True
        if embedding_dim is None:
            embedding_dim = hidden_dim
        _input = Embedding(input_dim=n_tokens,
                           output_dim=embedding_dim,
                           input_length=input_length)(i)
        shape = (batch_size, ) + (input_length, ) + (embedding_dim, )

    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))
    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer
    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    x = encoder(_input)
    decoder_outputs = decoder(x)
    output = TimeDistributed(Dense(n_tokens,
                                   activation='softmax'))(decoder_outputs)
    if is_embedding:
        return Model(_input, output)
    else:
        return Model(i, output)
        Created : 22nd Oct, 2018
"""

import keras
from keras.layers import Input, Multiply
from keras.layers import LSTM, ConvLSTM2D
# from keras.layers import LSTMCell

from CustomNets import make_from_mobilenet

import code
import numpy as np
import cv2

image_nrows = 240
image_ncols = 320
image_nchnl = 3

input_img = Input(shape=(50, ), name='feature_cube')
att = Input(shape=(50, ), name='attention')

u = Multiply()([input_img, att])

from recurrentshop import LSTMCell
lstm_output, state1_t, state2_t = LSTMCell(10)(u, state1_tm1, state_tm1)

model = keras.models.Model(inputs=[input_img, att], outputs=u)

model.summary()
keras.utils.plot_model(model, to_file='./model_att.png', show_shapes=True)
Exemplo n.º 23
0
def AttentionSeq2Seq(output_dim, output_length, hidden_dim=None, depth=1, bidirectional=True, dropout=0., **kwargs):
	'''
	This is an attention Seq2seq model based on [3].
	Here, there is a soft allignment between the input and output sequence elements.
	A bidirection encoder is used by default. There is no hidden state transfer in this
	model.

	The  math:

		Encoder:
		X = Input Sequence of length m.
		H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True, 
		so H is a sequence of vectors of length m.

		Decoder:
        y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
        and v (called the context vector) is a weighted sum over H:

        v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)

        The weight alpha[i, j] for each hj is computed as follows:
        energy = a(s(i-1), H(j))        
        alhpa = softmax(energy)
        Where a is a feed forward network.

	'''
	if type(depth) == int:
		depth = [depth, depth]
	if 'batch_input_shape' in kwargs:
		shape = kwargs['batch_input_shape']
		del kwargs['batch_input_shape']
	elif 'input_shape' in kwargs:
		shape = (None,) + tuple(kwargs['input_shape'])
		del kwargs['input_shape']
	elif 'input_dim' in kwargs:
		if 'input_length' in kwargs:
			shape = (None, kwargs['input_length'], kwargs['input_dim'])
			del kwargs['input_length']
		else:
			shape = (None, None, kwargs['input_dim'])
		del kwargs['input_dim']
	if 'unroll' in kwargs:
		unroll = kwargs['unroll']
		del kwargs['unroll']
	else:
		unroll = False
	if 'stateful' in kwargs:
		stateful = kwargs['stateful']
		del kwargs['stateful']
	else:
		stateful = False
	if not hidden_dim:
		hidden_dim = output_dim
	encoder = RecurrentContainer(unroll=unroll, stateful=stateful, return_sequences=True, input_length=shape[1])
	encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]), **kwargs))
	for _ in range(1, depth[0]):
		encoder.add(Dropout(dropout))
		encoder.add(LSTMCell(hidden_dim, **kwargs))
	input = Input(batch_shape=shape)
	if bidirectional:
		encoder = Bidirectional(encoder, merge_mode='sum')
	encoded = encoder(input)
	decoded = encoded
	for _ in range(1, depth[1]):
		decoder = AttentionDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], shape[1], hidden_dim)).get_layer(decode=True, output_length=output_length, unroll=unroll, stateful=stateful)
		decoded = Dropout(dropout)(decoded)
		decoded = decoder(decoded)
	decoder = AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_length if depth[1] > 1 else shape[1], hidden_dim)).get_layer(decode=True, output_length=output_length, unroll=unroll, stateful=stateful)
	decoded = Dropout(dropout)(decoded)
	decoded = decoder(decoded)
	model = Model(input, decoded)
	return model
Exemplo n.º 24
0
    print(out.shape)  #->(1,10)

    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    model.fit(x=Dx, y=Dy, batch_size=4, epochs=20)

    quit()

if False:
    from keras.layers import Input, add, Activation
    from recurrentshop import LSTMCell, GRUCell, RecurrentModel
    # RNNCell - Using RNNCells in Functional API
    input = Input(shape=(5, ))
    state1_tm1 = Input((10, ))
    state2_tm1 = Input((10, ))
    state3_tm1 = Input((10, ))

    lstm_output, state1_t, state2_t = LSTMCell(10)(
        [input, state1_tm1, state2_tm1])
    gru_output, state3_t = GRUCell(10)([input, state3_tm1])

    output = add([lstm_output, gru_output])
    output = Activation('tanh')(output)

    rnn = RecurrentModel(input=input,
                         initial_states=[state1_tm1, state2_tm1, state3_tm1],
                         output=output,
                         final_states=[state1_t, state2_t, state3_t])
    def __init__(self, config):

        self.model = None
        self.check_list = {
            'text_maxlen', 'sentence_maxnum', 'sentence_maxlen', 'hidden_size',
            'delimiter', 'pad_word', 'unk_word', 'start_sent', 'end_sent',
            'vocab_size', 'embed_size', 'learning_rate'
        }
        self.config = config
        assert self.check(), 'parametre check failed'

        self.size = self.config['hidden_size']

        self.Emb = Embedding(self.config['vocab_size'],
                             self.config['embed_size'],
                             trainable=True)
        self.Splitlayer_keephead = SplitLayer(
            delimiter=self.config['delimiter'],
            output_sentence_len=self.config['sentence_maxlen'],
            output_sentence_num=self.config['sentence_maxnum'],
            pad_word=self.config['pad_word'],
            cut_head=False,
            name='Split_Layer_keep_head')
        self.Splitlayer_cuthead = SplitLayer(
            delimiter=self.config['delimiter'],
            output_sentence_len=self.config['sentence_maxlen'],
            output_sentence_num=self.config['sentence_maxnum'],
            pad_word=self.config['pad_word'],
            cut_head=True,
            name='Split_Layer_cut_head')
        self.Sentence_reshape1D = Reshape((self.config['sentence_maxnum'] *
                                           self.config['sentence_maxlen'], ),
                                          name='Sentence_reshape1D')

        self.Sentence_reshape2D = Reshape((
            self.config['sentence_maxnum'],
            self.config['sentence_maxlen'],
            self.config['embed_size'],
        ),
                                          name='Sentence_reshape2D')
        self.Encoder_word = CuDNNLSTM(units=self.size,
                                      name='Encoder_word',
                                      return_state=True)
        self.Decoder_word_cell = LSTMCell(units=self.size,
                                          name='Decoder_word_cell')

        self.AttentionMapper = Linear(output_size=self.size,
                                      bias=True,
                                      bias_start=0.0,
                                      activation='tanh')
        self.Join = Dense(units=1, use_bias=False,
                          name='Join')  # shape : [attention_vec_size]
        self.Exp = Lambda(lambda x: K.exp(x), name='Exp')
        self.Calcprob = Dense(units=self.config['vocab_size'],
                              activation='softmax',
                              name='Calcprob')
        self.ArgMax = Lambda(lambda x: K.argmax(x, axis=-1), dtype='int32')
        self.Printer = Lambda(lambda x: K.tf.Print(x, [x]))
        self.Identical = Lambda(lambda x: x, name='Identical')

        self.EncoderModel = None
        self.DecoderModel_onestep = None

        self._mask = None
        self._targets = None

        self.optim = optimizers.SGD(config['learning_rate'])
        return
Exemplo n.º 26
0
def Seq2SeqtableQA(row_maxlen,
                   question_maxlen,
                   answer_maxlen,
                   len_dic,
                   hidden_dim,
                   batch_size,
                   depth=(1, 1),
                   dropout=0.0,
                   unroll=False,
                   stateful=False):
    # def Seq2SeqtableQA(output_dim, output_length, hidden_dim=None, input_shape=None,
    # batch_size=None, batch_input_shape=None, input_dim=None,
    # input_length=None, depth=1, dropout=0.0, unroll=False,
    # stateful=False):
    '''
    Based on SimpleSeq2Seq
    from https://github.com/farizrahman4u/seq2seq/blob/master/seq2seq/models.py
    '''

    # input placeholders
    table = Input((row_maxlen, len_dic))
    question = Input((question_maxlen, len_dic))

    # table encoder
    table_encoder = RecurrentSequential(unroll=unroll, stateful=stateful)
    # table_encoder.add(Embedding(input_dim=len_dic,
    #                             output_dim=hidden_dim,
    #                             input_length=row_maxlen,
    #                             # weights = [embedding_matrix],
    #                             mask_zero=True,
    #                             trainable=False))
    table_encoder.add(
        LSTMCell(hidden_dim, batch_input_shape=(row_maxlen, len_dic)))

    for _ in range(1, depth[0]):
        table_encoder.add(Dropout(dropout))
        table_encoder.add(LSTMCell(hidden_dim))

    table_encoded = table_encoder(table)

    # question encoder
    question_encoder = RecurrentSequential(unroll=unroll, stateful=stateful)
    question_encoder.add(
        LSTMCell(hidden_dim, batch_input_shape=(question_maxlen, len_dic)))

    for _ in range(1, depth[0]):
        question_encoder.add(Dropout(dropout))
        question_encoder.add(LSTMCell(hidden_dim))

    question_encoded = question_encoder(question)

    # match table and question
    match = dot([table_encoded, question_encoded], axes=(1, 1))
    # match = Activation('softmax')(match)

    # answer decoder
    answer_decoder = RecurrentSequential(unroll=unroll,
                                         stateful=stateful,
                                         decode=True,
                                         output_length=answer_maxlen)
    answer_decoder.add(Dropout(dropout, input_shape=(batch_size, hidden_dim)))

    if depth[1] == 1:
        answer_decoder.add(LSTMCell(len_dic))
    else:
        answer_decoder.add(LSTMCell(hidden_dim))
        for _ in range(depth[1] - 2):
            answer_decoder.add(Dropout(dropout))
            answer_decoder.add(LSTMCell(hidden_dim))
    answer_decoder.add(Dropout(dropout))
    answer_decoder.add(LSTMCell(len_dic))

    answer_decoded = answer_decoder(match)

    return Model(inputs=[table, question], outputs=answer_decoded)
Exemplo n.º 27
0
def AttentionSeq2Seq(
    output_dim,
    output_length,
    batch_input_shape=None,
    batch_size=None,
    input_shape=None,
    input_length=None,
    input_dim=None,
    hidden_dim=None,
    depth=1,
    bidirectional=True,
    unroll=False,
    stateful=False,
    dropout=0.0,
):
    '''
    This is an attention Seq2seq model based on [3].
    Here, there is a soft allignment between the input and output sequence elements.
    A bidirection encoder is used by default. There is no hidden state transfer in this
    model.
    The  math:
            Encoder:
            X = Input Sequence of length m.
            H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True,
            so H is a sequence of vectors of length m.
            Decoder:
    y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
    and v (called the context vector) is a weighted sum over H:
    v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)
    The weight alpha[i, j] for each hj is computed as follows:
    energy = a(s(i-1), H(j))
    alpha = softmax(energy)
    Where a is a feed forward network.
    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    # encoded = encoder(_input)
    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    # inputs = [_input]
    # decoded = decoder(encoded)
    # model = Model(inputs, decoded)
    return encoder, decoder
Exemplo n.º 28
0
def mctn_level2_model(input,
                      output_dim,
                      output_length,
                      batch_input_shape=None,
                      batch_size=None,
                      input_shape=None,
                      input_length=None,
                      input_dim=None,
                      hidden_dim=None,
                      depth=1,
                      bidirectional=True,
                      unroll=False,
                      stateful=False,
                      dropout=0.0):
    """ 
  Level 2 MCTN used for translation between the joint embedded of 
  2 modalities to the third one. Due to the lack of ground truth, no 
  cycle phase happens
  """
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise

    if hidden_dim is None:
        hidden_dim = output_dim

    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    encoded = encoder(input)
    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    inputs = [input]
    decoded = decoder(encoded)

    return inputs, encoded, decoded
Exemplo n.º 29
0
class Vae:
    dec_input = Input(shape=(128, ))
    decoder_input = Input(shape=(133, ))
    h_in = Input(shape=(512, ))
    c_in = Input(shape=(512, ))
    readout_in = Input(shape=(133, ))
    enc_1 = Bidirectional(LSTM(256, name='Enc_RNN'), name='BiDir')
    enc_mean = Dense(128, name='mean')
    enc_log_sigma = Dense(128, name='log_sigma')
    h_init = Dense(1024, activation='tanh', name='state_init')
    dec_1 = LSTM(512, return_sequences=True, name='Dec_RNN')
    dec_3 = LSTMCell(512)
    mdn = MixtureDensity(5, 20, name='mdn')
    kl_tolerance = 0.2
    kl_weight_start = 0.01
    kl_weight = 0.5
    learning_rate = 0.001
    decay_rate = 0.9999
    kl_decay_rate = 0.99995
    min_learning_rate = 0.00001

    def __init__(self, max_len=250):
        self.curr_kl_weight = self.kl_weight_start
        self.max_len = max_len
        self.input = Input((
            max_len,
            5,
        ), name="stroke_batch")
        self.output = Input((
            max_len,
            5,
        ), name="stroke_target")

        self.build_model()

        print(self.model.summary())

    def tile(self, tensor):
        return K.tile(tensor, [1, self.max_len, 1])

    def build_model(self):
        # ====================== ENCODER =============================
        a = self.enc_1(self.output)
        self.mean = self.enc_mean(a)
        self.log_sigma = self.enc_log_sigma(a)

        self.kl_loss = -0.5 * K.mean(
            1 + self.log_sigma - K.square(self.mean) - K.exp(self.log_sigma),
            axis=[0, 1])
        self.kl_loss = K.maximum(self.kl_loss, self.kl_tolerance)

        # encoded = concatenate([mean, log_sigma])
        self.encoder = Model(self.output, self.mean, name='encoder')

        # self.mean = Lambda(lambda x: x[:, :128])(encoded)
        # self.log_sigma = Lambda(lambda x: x[:, 128:])(encoded)
        self.z = Lambda(sampling)([self.mean, self.log_sigma])

        # ====================== VAE ==============================
        _h = self.h_init(self.z)

        _h_1 = Lambda(lambda x: x[:, :512])(_h)
        _c_1 = Lambda(lambda x: x[:, 512:])(_h)

        z_ = Reshape((
            1,
            128,
        ))(self.z)
        z_ = Lambda(self.tile)(z_)

        z_ = concatenate([z_, self.input], axis=2)

        out = self.dec_1(z_, initial_state=[_h_1, _c_1])

        out = self.mdn(out)

        self.model = Model([self.output, self.input], out)

        # ====================== DECODER ===========================
        _h_ = self.h_init(self.dec_input)

        _h_1_ = Lambda(lambda x: x[:, :512])(_h_)
        _c_1_ = Lambda(lambda x: x[:, 512:])(_h_)

        _z_ = Reshape((
            1,
            128,
        ))(self.dec_input)
        _z_ = Lambda(self.tile)(_z_)

        _z_ = concatenate([_z_, self.input], axis=2)

        out_ = self.dec_1(_z_, initial_state=[_h_1_, _c_1_])

        out_ = self.mdn(out_)

        self.decoder = Model([self.dec_input, self.input], out_)

        def get_mixture_coef(output):
            out_pi = output[:, :20]
            out_mu_x = output[:, 20:40]
            out_mu_y = output[:, 40:60]
            out_sigma_x = output[:, 60:80]
            out_sigma_y = output[:, 80:100]
            out_ro = output[:, 100:120]
            pen_logits = output[:, 120:123]

            # use softmax to normalize pi and q into prob distribution
            # max_pi = K.max(out_pi, axis = 1, keepdims=True)
            # out_pi = out_pi - max_pi
            # out_pi = K.exp(out_pi)
            # normalize_pi = 1 / (K.sum(out_pi, axis = 1, keepdims = True))
            # out_pi = normalize_pi * out_pi
            out_pi = K.softmax(out_pi)

            out_q = K.softmax(pen_logits)
            # max_q = K.max(pen_logits, axis = 1, keepdims = True)
            # out_q = pen_logits - max_q
            # out_q = K.exp(pen_logits)
            # normalize_q = 1 / (K.sum(out_q, axis = 1, keepdims = True))
            # out_q = normalize_q * out_q

            # sue tanh to normalize correlation coefficient
            out_ro = K.tanh(out_ro)

            # use exponential to make sure sigma is positive
            out_sigma_x = K.exp(out_sigma_x)
            out_sigma_y = K.exp(out_sigma_y)

            return out_pi, out_mu_x, out_mu_y, out_sigma_x, out_sigma_y, out_ro, pen_logits, out_q

        def tf_bi_normal(x, y, mu_x, mu_y, sigma_x, sigma_y, ro):
            x_ = K.reshape(x, (-1, 1))
            y_ = K.reshape(y, (-1, 1))
            norm1 = x_ - mu_x
            norm2 = y_ - mu_y
            sigma = sigma_x * sigma_y
            z = (K.square(norm1 / (sigma_x + 1e-8)) +
                 K.square(norm2 / (sigma_y + 1e-8)) -
                 (2 * ro * norm1 * norm2) / (sigma + 1e-8) + 1e-8)
            ro_opp = 1 - K.square(ro)
            result = K.exp(-z / (2 * ro_opp + 1e-8))
            denom = 2 * np.pi * sigma * K.sqrt(ro_opp) + 1e-8
            result = result / denom + 1e-8
            return result

        def get_lossfunc(out_pi, out_mu_x, out_mu_y, out_sigma_x, out_sigma_y,
                         out_ro, out_q, x, y, logits):
            # L_r loss term calculation, L_s part
            result = tf_bi_normal(x, y, out_mu_x, out_mu_y, out_sigma_x,
                                  out_sigma_y, out_ro)
            result = result * out_pi
            result = K.sum(result, axis=1, keepdims=True)
            result = -K.log(result + 1e-8)
            fs = 1.0 - logits[:, 2]
            fs = K.reshape(fs, (-1, 1))
            result = result * fs
            # L_r loss term, L_p part
            result1 = K.categorical_crossentropy(out_q,
                                                 logits,
                                                 from_logits=True)
            result1 = K.reshape(result1, (-1, 1))
            result = result + result1
            return K.mean(result, axis=[0, 1])

        output = K.reshape(out, [-1, 123])

        self.pi, self.mu_x, self.mu_y, self.sigma_x, self.sigma_y, self.ro, \
        self.logits, self.q = get_mixture_coef(output)

        target = K.reshape(self.output, [-1, 5])
        val_x = target[:, 0]
        val_y = target[:, 1]
        pen = target[:, 2:]

        self.rec_loss = get_lossfunc(self.pi, self.mu_x, self.mu_y,
                                     self.sigma_x, self.sigma_y, self.ro,
                                     self.logits, val_x, val_y, pen)

        self.loss = self.rec_loss + self.curr_kl_weight * self.kl_loss

        adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
        self.model.add_loss(self.loss)
        self.model.compile(optimizer=adam, metrics=['accuracy'])

    # def build_decoder(self):
    # dec_out, h, c = self.dec_3([self.decoder_input, self.h_in, self.c_in])

    # dec_out = self.mdn(dec_out)

    # rnn = RecurrentModel(input = self.decoder_input,
    #                      initial_states = [self.h_in, self.c_in],
    #                      output = dec_out, final_states = [h, c],
    #                      return_sequences = True, name = 'Dec_RNN')

    # rnn(z_, initial_state = [_h_1, _c_1])

    def update_params(self, step):
        curr_learning_rate = ((self.learning_rate - self.min_learning_rate) *
                              (self.decay_rate**step) + self.min_learning_rate)
        self.curr_kl_weight = (self.kl_weight -
                               (self.kl_weight - self.kl_weight_start) *
                               (self.kl_decay_rate**step))
        K.set_value(self.model.optimizer.lr, curr_learning_rate)
Exemplo n.º 30
0
    def _make_model(self):
        """Constructs ContraVAE model.

    Returns:
      Nothing.
    """

        # Load embedding in Embedding layer
        logger.info("Making model...")
        embedding_matrix = self._load_embedding()
        embedding_layer = Embedding(
            self.num_words + 1,
            self.config.embedding_dim,
            weights=[embedding_matrix],
            input_length=self.config.max_sequence_length,
            trainable=False)

        # Q(z|X,c) -- encoder
        # Embedded sequence input
        sequence_inputs = Input(batch_shape=(self.config.batch_size,
                                             self.config.max_sequence_length),
                                dtype='int32')
        embedded_sequence_inputs = embedding_layer(sequence_inputs)
        # Merge with score inputs
        score_inputs = Input(batch_shape=(self.config.batch_size, 1))
        score_inputs_repeated = RepeatVector(
            self.config.max_sequence_length)(score_inputs)
        last_layer = concatenate(
            [embedded_sequence_inputs, score_inputs_repeated], axis=2)
        # LSTM layers
        for dim in self.config.encoder_lstm_dims[:-1]:
            last_layer = LSTM(dim, return_sequences=True)(last_layer)
        last_layer = LSTM(self.config.encoder_lstm_dims[-1],
                          return_sequences=False)(last_layer)
        # Mean and std of z
        z_mean = Dense(self.config.latent_dim, activation='tanh')(last_layer)
        z_log_sigma = Dense(self.config.latent_dim,
                            activation='tanh')(last_layer)

        # Sample z ~ Q(z|X,c)
        def sampling(args):
            z_mean, z_log_sigma = args
            epsilon = K.random_normal_variable(shape=(self.config.batch_size,
                                                      self.config.latent_dim),
                                               mean=0.,
                                               scale=1.)
            return z_mean + K.exp(z_log_sigma / 2.) * epsilon

        z = Lambda(sampling)([z_mean, z_log_sigma])

        # Second score inputs - at training time this is simply equal to
        # score_inputs; at sampling time this could vary.
        score_inputs2 = Input(batch_shape=(self.config.batch_size, 1))
        z_c = concatenate([z, score_inputs2], axis=1)
        # Repeat z_c so every timestep has access to it
        #z_c_repeated = RepeatVector(self.config.max_sequence_length)(z_c)

        # P(X|z,c) -- decoder.
        rnn = RecurrentSequential(
            decode=True, output_length=self.config.max_sequence_length)
        rnn.add(
            LSTMCell(self.config.decoder_lstm_dims[0],
                     input_dim=self.config.latent_dim + 1))
        for dim in self.config.decoder_lstm_dims[1:]:
            rnn.add(LSTMCell(dim))
        decoder_out = TimeDistributed(
            Dense(self.num_words + 1, activation='tanh'))

        # Decoder output
        # x_decoded = rnn(z_c_repeated, ground_truth=sequence_inputs)
        h_decoded = rnn(z_c)
        x_decoded = decoder_out(h_decoded)

        # Construct models
        # VAE
        vae = Model([sequence_inputs, score_inputs, score_inputs2], x_decoded)
        # Encoder
        encoder = Model([sequence_inputs, score_inputs], z_mean)
        # Generator
        generator_z_inputs = Input(batch_shape=(self.config.batch_size,
                                                self.config.latent_dim))
        generator_z_c = concatenate([generator_z_inputs, score_inputs2],
                                    axis=1)
        generator_h_decoded = rnn(generator_z_c)
        generator_x_decoded = decoder_out(generator_h_decoded)
        generator = Model([generator_z_inputs, score_inputs2],
                          generator_x_decoded)

        # Define loss function
        kl_weight = self.config.kl_weight

        def recon_loss(y_true, y_pred):
            """E[log P(X|z,y)].
      """
            recon = K.mean(K.sparse_categorical_crossentropy(output=y_pred,
                                                             target=y_true,
                                                             from_logits=True),
                           axis=1)
            return recon

        def kl_loss(y_true, y_pred):
            """D_KL(Q(z|X,y) || P(z|X)); calculate in closed form as both
      dist. are Gaussian.
      """
            kl = 0.5 * K.mean(
                K.exp(z_log_sigma) + K.square(z_mean) - 1. - z_log_sigma,
                axis=1)
            kl = kl * kl_weight
            return kl

        def vae_loss(y_true, y_pred):
            """Calculate loss = reconstruction loss + KL loss.
      """
            recon = recon_loss(y_true, y_pred)
            kl = kl_loss(y_true, y_pred)
            return recon + kl

        # Compile model
        vae.compile(loss=vae_loss,
                    optimizer=self.config.optimizer,
                    metrics=[recon_loss, kl_loss])

        self.vae = vae
        self.encoder = encoder
        self.generator = generator
        logger.info("Done making model.")