示例#1
0
def encoder_init(input,
                 postcshape,
                 hidden_dim,
                 depth,
                 dropout=0,
                 seq2seq=True,
                 bidirectional=True,
                 unroll=False,
                 stateful=False,
                 Encoder=None,
                 global_name="",
                 return_model=False):
    if Encoder == None:
        Encoder = [hidden_dim] * depth[0]
    else:
        if len(Encoder) < depth[0]:
            Encoder = Encoder + [hidden_dim] * (depth[0] - len(Encoder))
    encoder = RecurrentSequential(
        unroll=unroll,
        stateful=stateful,
        #   return_states=True, return_all_states=True, AllStateTransfer needs modification in the tensorflow backend
        return_sequences=True,
        name=global_name + 'encoder')
    encoder.add(LSTMCell(Encoder[0], batch_input_shape=postcshape[1:]))

    for k in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(Encoder[k]))

    if bidirectional:
        encoder = Bidirectional(encoder,
                                merge_mode='sum',
                                name=global_name + 'encoder')
        encoder.forward_layer.build(postcshape)
        encoder.backward_layer.build(postcshape)
        # patch
        encoder.layer = encoder.forward_layer
    if return_model:
        enc_input = Input(shape=postcshape[1:], name='encoder_input')
        encoded_out = encoder(enc_input)
        encoder_model = Model(inputs=[enc_input], outputs=[encoded_out])
        return encoder_model(input)
    return encoder(input)
示例#2
0
def AttentionSeq2Seq(
    output_dim,
    output_length,
    batch_input_shape=None,
    batch_size=None,
    input_shape=None,
    input_length=None,
    input_dim=None,
    hidden_dim=None,
    depth=1,
    bidirectional=True,
    unroll=False,
    stateful=False,
    dropout=0.0,
):
    '''
    This is an attention Seq2seq model based on [3].
    Here, there is a soft allignment between the input and output sequence elements.
    A bidirection encoder is used by default. There is no hidden state transfer in this
    model.
    The  math:
            Encoder:
            X = Input Sequence of length m.
            H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True,
            so H is a sequence of vectors of length m.
            Decoder:
    y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
    and v (called the context vector) is a weighted sum over H:
    v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)
    The weight alpha[i, j] for each hj is computed as follows:
    energy = a(s(i-1), H(j))
    alpha = softmax(energy)
    Where a is a feed forward network.
    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    # encoded = encoder(_input)
    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    # inputs = [_input]
    # decoded = decoder(encoded)
    # model = Model(inputs, decoded)
    return encoder, decoder
示例#3
0
def AttentionSeq2Seq(
    output_dim,
    output_length,
    batch_input_shape=None,
    batch_size=None,
    input_shape=None,
    input_length=None,
    is_embedding=True,
    embedding_dim=None,
    n_tokens=1000,
    input_dim=None,
    hidden_dim=None,
    depth=1,
    bidirectional=False,
    unroll=False,
    stateful=False,
    dropout=0.0,
):
    '''
    This is an attention Seq2seq model based on [3].
    Here, there is a soft allignment between the input and output sequence elements.
    A bidirection encoder is used by default. There is no hidden state transfer in this
    model.

    The  math:

            Encoder:
            X = Input Sequence of length m.
            H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True,
            so H is a sequence of vectors of length m.

            Decoder:
    y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
    and v (called the context vector) is a weighted sum over H:

    v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)

    The weight alpha[i, j] for each hj is computed as follows:
    energy = a(s(i-1), H(j))
    alpha = softmax(energy)
    Where a is a feed forward network.

    '''

    if isinstance(depth, int):
        depth = (depth, depth)

    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    elif input_length:
        if is_embedding == False and n_tokens > 0:
            pass
        else:
            raise TypeError

    if hidden_dim is None:
        hidden_dim = output_dim
    if is_embedding:
        _input = Input(batch_shape=shape)
        _input._keras_history[0].supports_masking = True
    else:
        i = Input(shape=(input_length, ), name='sentence_input', dtype='int32')
        i._keras_history[0].supports_masking = True
        if embedding_dim is None:
            embedding_dim = hidden_dim
        _input = Embedding(input_dim=n_tokens,
                           output_dim=embedding_dim,
                           input_length=input_length)(i)
        shape = (batch_size, ) + (input_length, ) + (embedding_dim, )

    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))
    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer
    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    x = encoder(_input)
    decoder_outputs = decoder(x)
    output = TimeDistributed(Dense(n_tokens,
                                   activation='softmax'))(decoder_outputs)
    if is_embedding:
        return Model(_input, output)
    else:
        return Model(i, output)
示例#4
0
def paired_trimodal_model(output_dim,
                          output_length,
                          batch_input_shape=None,
                          batch_size=None,
                          input_shape=None,
                          input_length=None,
                          input_dim=None,
                          hidden_dim=None,
                          depth=1,
                          bidirectional=True,
                          unroll=False,
                          stateful=False,
                          dropout=0.0):
    """
  One modal translates into two other modalities, no cycle involved 
  The model has 1 encoder and 2 decoders 
  """
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError

    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    # encoder phase
    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    # encoder phase
    encoder_2 = RecurrentSequential(unroll=unroll,
                                    stateful=stateful,
                                    return_sequences=True)
    encoder_2.add(
        LSTMCell(hidden_dim, batch_input_shape=(shape[0], output_dim)))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

        encoder_2.add(Dropout(dropout))
        encoder_2.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

        encoder_2 = Bidirectional(encoder_2, merge_mode='sum')
        encoder_2.forward_layer.build(shape)
        encoder_2.backward_layer.build(shape)
        # patch
        encoder_2.layer = encoder_2.forward_layer

    encoded_one = encoder(_input)

    # decoder phase
    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))

    decoder_2 = RecurrentSequential(decode=True,
                                    output_length=input_length,
                                    unroll=unroll,
                                    stateful=stateful)
    decoder_2.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))

    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    if depth[1] == 1:
        decoder_2.add(
            AttentionDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim))
    else:
        decoder_2.add(
            AttentionDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder_2.add(Dropout(dropout))
            decoder_2.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder_2.add(Dropout(dropout))
        decoder_2.add(
            LSTMDecoderCell(output_dim=input_dim, hidden_dim=hidden_dim))

    inputs = [_input]
    decoded_one = decoder(encoded_one)

    encoded_two = encoder_2(decoded_one)
    decoded_two = decoder_2(encoded_two)

    return inputs, encoded_one, encoded_two, decoded_one, decoded_two
示例#5
0
def mctn_model(output_dim,
               output_length,
               batch_input_shape=None,
               batch_size=None,
               input_shape=None,
               input_length=None,
               input_dim=None,
               hidden_dim=None,
               depth=1,
               bidirectional=True,
               unroll=False,
               stateful=False,
               dropout=0,
               is_cycled=True):
    """
  MCTN Model (by default with Cycle Consistency Loss) 
  """
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    # encoder phase
    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)

    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))
    # encoder.add(Dropout(dropout))
    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    encoded = encoder(_input)

    # decoder phase
    decoder = RecurrentSequential(
        decode=True,
        output_length=1,  #output_length
        unroll=unroll,
        stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    inputs = [_input]
    decoded_0 = decoder(encoded)
    decoded = Reshape((output_dim, ))(decoded_0)

    # cycle phase
    cycled_decoded = None
    if is_cycled:
        cycled_encoded = encoder(decoded_0)
        cycled_decoded = decoder(cycled_encoded)

    return inputs, encoded, decoded, cycled_decoded
示例#6
0
def mctn_level2_model(input,
                      output_dim,
                      output_length,
                      batch_input_shape=None,
                      batch_size=None,
                      input_shape=None,
                      input_length=None,
                      input_dim=None,
                      hidden_dim=None,
                      depth=1,
                      bidirectional=True,
                      unroll=False,
                      stateful=False,
                      dropout=0.0):
    """ 
  Level 2 MCTN used for translation between the joint embedded of 
  2 modalities to the third one. Due to the lack of ground truth, no 
  cycle phase happens
  """
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise

    if hidden_dim is None:
        hidden_dim = output_dim

    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    encoded = encoder(input)
    decoder = RecurrentSequential(decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful)
    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    inputs = [input]
    decoded = decoder(encoded)

    return inputs, encoded, decoded
示例#7
0
def AttentionSeq2Seq(
    output_dim,
    output_length,
    batch_input_shape=None,
    batch_size=None,
    input_shape=None,
    input_length=None,
    input_dim=None,
    hidden_dim=None,
    depth=1,
    bidirectional=True,
    unroll=False,
    stateful=False,
    dropout=0.0,
):
    '''
    [1] Sequence to Sequence Learning with Neural Networks
    [2] Learning Phrase Representations using RNN Encoder–Decoder for Statistical Machine Translation
    [3] Neural Machine Translation by Jointly Learning to Align and Translate
    [4] A Neural Conversational Model

    This is an attention Seq2seq model based on [3].
    Here, there is a soft allignment between the input and output sequence elements.
    A bidirection encoder is used by default. There is no hidden state transfer in this
    model.

    The  math:

            Encoder:
            X = Input Sequence of length m.
            H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True,
            so H is a sequence of vectors of length m.

            Decoder:
    y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
    and v (called the context vector) is a weighted sum over H:

    v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)

    The weight alpha[i, j] for each hj is computed as follows:
    energy = a(s(i-1), H(j))
    alpha = softmax(energy)
    Where a is a feed forward network.

    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
    # shape:[batch, max_encoder_length, input_dim]
    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    # 1.定义encoder
    encoder = RecurrentSequential(unroll=unroll,
                                  stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(
        hidden_dim,
        batch_input_shape=(shape[0],
                           shape[2])))  # shape[0]:batch, shape[2]:input_dim

    for _ in range(1, depth[0]):  # 所谓的depth,就是lstm堆叠的层数
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(
            shape)  # [batch, max_encoder_length, input_dim]
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    # 2.encode
    # _input:[batch, max_encoder_length, input_dim]
    # encoded: [batch, max_encoder_length, hidden]
    encoded = encoder(_input)

    # 3.定义decoder
    decoder = RecurrentSequential(
        decode=True,
        output_length=output_length,
        unroll=unroll,  # False
        stateful=stateful)  # False

    decoder.add(
        Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    # attention
    decoder.add(
        AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    if depth[1] != 1:
        decoder.add(
            AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))

        decoder.add(Dropout(dropout))

        decoder.add(
            LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    # 4. decode
    decoded = decoder(encoded)

    inputs = [_input]
    model = Model(inputs=inputs, outputs=decoded)

    return model
示例#8
0
def Pointer(output_dim, output_length, batch_input_shape=None,
                     batch_size=None, input_shape=None, input_length=None,
                     input_dim=None, hidden_dim=None, depth=1,
                     bidirectional=True, unroll=False, stateful=False, dropout=0.0,):
    '''
    This is an attention Seq2seq model based on [3].
    Here, there is a soft allignment between the input and output sequence elements.
    A bidirection encoder is used by default. There is no hidden state transfer in this
    model.

    The  math:

            Encoder:
            X = Input Sequence of length m.
            H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True,
            so H is a sequence of vectors of length m.

            Decoder:
    y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c)
    and v (called the context vector) is a weighted sum over H:

    v(i) =  sigma(j = 0 to m-1)  alpha(i, j) * H(j)

    The weight alpha[i, j] for each hj is computed as follows:
    energy = a(s(i-1), H(j))
    alpha = softmax(energy)
    Where a is a feed forward networ k.

    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim
    # print shape    

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    encoder = RecurrentSequential(unroll=unroll, stateful=False,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    for _ in range(1, depth[0]):
        encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    encoded = encoder(_input)
    decoder = RecurrentSequential(decode=True, output_length=output_length,
                                  unroll=unroll, stateful=stateful,return_sequences=True)

    # decoder.add(Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    # if depth[1] == 1:
    # decoder.add(PointerDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    # decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))

    decoder.add(PointerDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim,batch_input_shape=(shape[0], shape[1], hidden_dim)))

    # decoder.add(TimeDistributed(Activation('softmax')))
    # decoder.add(TimeDistributed(Activation('softmax')))
    # output = TimeDistributed(Dense(output_dim, activation='softmax'))
    # output = TimeDistributed(Activation='softmax')

    # output = TimeDistributed(Dense(output_dim, activation='softmax'))
    # Dense(class_count, activation='softmax')(x)
    # decoder.add(Dense(output_dim, activation='softmax')(x))
    # else:
    #     decoder.add(PointerDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    #     for _ in range(depth[1] - 2):
    #         decoder.add(Dropout(dropout))
    #         decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
    #     decoder.add(Dropout(dropout))
    #     decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))


    # Softmax is outside
    # inputs = [_input]
    # decoded = decoder(encoded)
    # outputs = output(decoded)
    # model = Model(inputs, outputs)
    # return model 

    # Softmax is inside cell
    inputs = [_input]
    decoded = decoder(encoded)
    model = Model(inputs, decoded)
    return model
示例#9
0
def AttentionSeqtoSeq(output_dim, output_length, batch_input_shape=None,
                     batch_size=None, input_shape=None, input_length=None,
                     input_dim=None, hidden_dim=None, depth=1,
                     bidirectional=True, unroll=False, stateful=False, dropout=0.0,
                     ):
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True

    encoder = RecurrentSequential(unroll=unroll, stateful=stateful,
                                  return_sequences=True)
    encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2])))

    for _ in range(1, depth[0]):
        #encoder.add(Dropout(dropout))
        encoder.add(LSTMCell(hidden_dim))

    if bidirectional:
        encoder = Bidirectional(encoder, merge_mode='sum')
        encoder.forward_layer.build(shape)
        encoder.backward_layer.build(shape)
        # patch
        encoder.layer = encoder.forward_layer

    encoded = encoder(_input)
    #decoder_input = Input(batch_shape = encoded.shape)#新加的
    decoder = RecurrentSequential(decode=True, output_length=output_length,
                                  unroll=unroll, stateful=stateful)

    decoder.add(Dropout(dropout, batch_input_shape=(shape[0], shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
    else:
        decoder.add(AttentionDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            #decoder.add(Dropout(dropout))
            decoder.add(LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        #decoder.add(Dropout(dropout))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim))
        decoder.add(Dense(output_dim*2))
        decoder.add(Dense(output_dim,activation = "softmax"))
    
    inputs = [_input]
    decoded = decoder(encoded)
    model = Model(inputs, decoded)
    return model