def base_attention_lstm(vocabulary_size, time_steps=32, type="after"):
    """

    :param vocabulary_size:
    :param time_steps:
    :param type: before/after
    :return:
    """
    INPUT_DIM = 64
    LSTM_UNITS = 32

    text_input = Input(shape=(time_steps, ), dtype='int32',
                       name='text')  # (batch_size, time_steps)
    inputs = layers.Embedding(vocabulary_size, INPUT_DIM)(
        text_input)  # (batch_size, time_steps, input_dim)

    if type == "before":
        output_attention_mul = attention_3d_block(inputs,
                                                  time_steps=time_steps,
                                                  single_attention_vector=True)
        attention_mul = LSTM(LSTM_UNITS,
                             return_sequences=False)(output_attention_mul)
    else:  # after
        lstm_out = LSTM(LSTM_UNITS, return_sequences=True)(inputs)
        lstm_out = Reshape((time_steps, LSTM_UNITS))(lstm_out)
        attention_mul = attention_3d_block(lstm_out,
                                           time_steps=time_steps,
                                           single_attention_vector=True)
        attention_mul = Flatten()(attention_mul)
        attention_mul = Dense(64)(attention_mul)

    output = Dense(1, activation='sigmoid')(attention_mul)
    model = Model(inputs=text_input, outputs=output)

    return model
    def __init__(self, type, model_name):
        self.type = type
        self.model_name = model_name
        self.word2em = load_glove()
        # print(len(self.word2em))
        # print(self.word2em['start'])

        self.target_word2idx = np.load(
            'chatbot_train/models/' + DATA_SET_NAME +
            '/word-glove-target-word2idx.npy').item()
        self.target_idx2word = np.load(
            'chatbot_train/models/' + DATA_SET_NAME +
            '/word-glove-target-idx2word.npy').item()
        context = np.load('chatbot_train/models/' + DATA_SET_NAME +
                          '/word-glove-context.npy').item()
        self.max_encoder_seq_length = context['encoder_max_seq_length']
        self.max_decoder_seq_length = context['decoder_max_seq_length']
        self.num_decoder_tokens = context['num_decoder_tokens']

        if ('attention' in self.type):
            # THIS IS STILL RANDOM IDEA
            # encoder_inputs = Input(shape=(None, MAX_INPUT_SEQ_LENGTH, GLOVE_EMBEDDING_SIZE), name='encoder_inputs')
            encoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE),
                                   name='encoder_inputs')
        else:
            encoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE),
                                   name='encoder_inputs')

        if (self.type == 'bidirectional'):
            print('PREDICTING ON BIDIRECTIONAL')

            encoder_lstm = Bidirectional(
                LSTM(units=HIDDEN_UNITS,
                     return_state=True,
                     name='encoder_lstm'))
            encoder_outputs, encoder_state_forward_h, encoder_state_forward_c, encoder_state_backward_h, encoder_state_backward_c = encoder_lstm(
                encoder_inputs)

            # IF BIDIRECTIONAL, NEEDS TO CONCATENATE FORWARD AND BACKWARD STATE
            encoder_state_h = Concatenate()(
                [encoder_state_forward_h, encoder_state_backward_h])
            encoder_state_c = Concatenate()(
                [encoder_state_forward_c, encoder_state_backward_c])
        else:
            encoder_lstm = LSTM(units=HIDDEN_UNITS,
                                return_state=True,
                                name='encoder_lstm')

            if ('attention' in self.type):
                # THIS IS STILL RANDOM IDEA TO IGNORE THE 2ND DIMENSION
                # encoder_outputs, _, encoder_state_h, encoder_state_c = encoder_lstm(encoder_inputs)
                encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(
                    encoder_inputs)
            else:
                encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(
                    encoder_inputs)

        encoder_states = [encoder_state_h, encoder_state_c]

        if (self.type == 'bidirectional'):
            decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE),
                                   name='decoder_inputs')
            decoder_lstm = LSTM(units=HIDDEN_UNITS * 2,
                                return_state=True,
                                return_sequences=True,
                                name='decoder_lstm')
            decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(
                decoder_inputs, initial_state=encoder_states)
        else:
            if ('attention' in self.type):
                # HERE, THE GLOVE EMBEDDING SIZE ACTS AS THE INPUT DIMENSION
                # IF USING ATTENTION, WE NEED TO SET SHAPE WITH TIME STEPS, NOT WITH NONE
                # THIS INPUT WILL BE USED WHEN BUILDING ENCODER OUTPUTS

                # decoder_inputs = Input(shape=(None, attention_lstm.TIME_STEPS, GLOVE_EMBEDDING_SIZE), name='decoder_inputs')
                # decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='decoder_inputs')
                # decoder_inputs = Input(shape=(MAX_TARGET_SEQ_LENGTH + 2, GLOVE_EMBEDDING_SIZE), name='decoder_inputs')
                decoder_inputs = Input(shape=(self.max_decoder_seq_length,
                                              GLOVE_EMBEDDING_SIZE),
                                       name='decoder_inputs')

                if (self.type == 'attention_before'):
                    attention_mul = attention_lstm.attention_3d_block(
                        decoder_inputs, self.max_decoder_seq_length)
            else:
                decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE),
                                       name='decoder_inputs')

            # PAY ATTENTION THAT DECODER AND ENCODER STATE MUST ALWAYS HAVE THE SAME DIMENSION
            # IN THIS CASE, WE USE 2D
            decoder_lstm = LSTM(units=HIDDEN_UNITS,
                                return_state=True,
                                return_sequences=True,
                                name='decoder_lstm')

            if ('attention' in self.type):
                # REMOVE ENCODER AS INITIAL STATE FOR ATTENTION
                # decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs)
                decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(
                    decoder_inputs, initial_state=encoder_states)
            else:
                decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(
                    decoder_inputs, initial_state=encoder_states)

        if (self.type == 'attention_after'):
            attention_mul = attention_lstm.attention_3d_block(
                decoder_outputs, self.max_decoder_seq_length)
            # SOMEHOW THIS FLATTEN FUNCTION CAUSE THE PROBLEM
            # attention_mul = Flatten()(attention_mul)

        decoder_dense = Dense(units=self.num_decoder_tokens,
                              activation='softmax',
                              name='decoder_dense')

        if (self.type == 'attention_after' or self.type == 'attention_before'):
            decoder_outputs = decoder_dense(attention_mul)
        else:
            decoder_outputs = decoder_dense(decoder_outputs)

        self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

        # model_json = open('chatbot_train/models/' + DATA_SET_NAME + '/word-glove-architecture.json', 'r').read()
        # self.model = model_from_json(model_json)

        # CHANGE THE MODEL FILE TO ENV SO IT CAN BE CONFIGURABLE WHICH
        # MODEL (ITERATION) WILL BE USED TO REPLY
        # self.model.load_weights('chatbot_train/models/' + DATA_SET_NAME + '/word-glove-weights.h5')
        self.model.load_weights('chatbot_train/models/' + DATA_SET_NAME + '/' +
                                os.getenv(self.model_name))
        self.model.compile(optimizer='rmsprop',
                           loss='categorical_crossentropy')

        self.encoder_model = Model(encoder_inputs, encoder_states)

        if (self.type == 'bidirectional'):
            decoder_state_inputs = [
                Input(shape=(HIDDEN_UNITS * 2, )),
                Input(shape=(HIDDEN_UNITS * 2, ))
            ]
        else:
            decoder_state_inputs = [
                Input(shape=(HIDDEN_UNITS, )),
                Input(shape=(HIDDEN_UNITS, ))
            ]

        decoder_outputs, state_h, state_c = decoder_lstm(
            decoder_inputs, initial_state=decoder_state_inputs)
        decoder_states = [state_h, state_c]
        decoder_outputs = decoder_dense(decoder_outputs)
        self.decoder_model = Model([decoder_inputs] + decoder_state_inputs,
                                   [decoder_outputs] + decoder_states)
Пример #3
0
    decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='decoder_inputs')
    decoder_lstm = LSTM(units=HIDDEN_UNITS * 2, return_state=True, return_sequences=True, name='decoder_lstm')
    decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs,
                                                                     initial_state=encoder_states)
else:
    if('attention' in sys.argv[1]):
        # HERE, THE GLOVE EMBEDDING SIZE ACTS AS THE INPUT DIMENSION
        # IF USING ATTENTION, WE NEED TO SET SHAPE WITH TIME STEPS, NOT WITH NONE
        # THIS INPUT WILL BE USED WHEN BUILDING ENCODER OUTPUTS

        # decoder_inputs = Input(shape=(None, attention_lstm.TIME_STEPS, GLOVE_EMBEDDING_SIZE), name='decoder_inputs')
        # decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='decoder_inputs')
        decoder_inputs = Input(shape=(decoder_max_seq_length, GLOVE_EMBEDDING_SIZE), name='decoder_inputs')

        if(sys.argv[1] == 'attention_before'):
            attention_mul = attention_lstm.attention_3d_block(decoder_inputs, decoder_max_seq_length)
    else:
        decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='decoder_inputs')

    # PAY ATTENTION THAT DECODER AND ENCODER STATE MUST ALWAYS HAVE THE SAME DIMENSION
    # IN THIS CASE, WE USE 2D
    decoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, return_sequences=True, name='decoder_lstm')

    if('attention' in sys.argv[1]):
        # REMOVE ENCODER AS INITIAL STATE FOR ATTENTION
        # decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs)
        decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs,
                                                                     initial_state=encoder_states)
    else:
        decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs,
                                                                     initial_state=encoder_states)
    model.add(Dropout(DROPOUT_VAL))
    model.add(Dense(LSTM_DIM))
    model.add(Dropout(DROPOUT_VAL))
    model.add(Dense(len(chars)))
    model.add(Activation("softmax"))
elif network == "LSTM2-ATTENTION":
    # Model 2 - CuDNNLSTM X*2 + attention block
    from keras.models import Model
    from keras.layers import Input, Flatten

    inputs = Input(shape=(maxlen, len(chars),))
    lstm_out = CuDNNLSTM(LSTM_DIM, return_sequences=True)(inputs)
    lstm_out = Dropout(DROPOUT_VAL)(lstm_out)
    lstm_out = CuDNNLSTM(LSTM_DIM, return_sequences=True)(lstm_out)
    lstm_out = Dropout(DROPOUT_VAL)(lstm_out)
    attention_mul = attention_3d_block(lstm_out)
    attention_mul = Flatten()(attention_mul)
    output = Dense(len(chars), activation='softmax')(attention_mul)
    model = Model(input=[inputs], output=output)
elif network == "LSTM2-ATTENTION-FELIX":
    # Model 2 - CuDNNLSTM X*2 + attention block
    from keras.models import Model
    from keras.layers import Input

    inputs = Input(shape=(maxlen, len(chars),))
    lstm_out = CuDNNLSTM(LSTM_DIM, return_sequences=True)(inputs)
    lstm_out = Dropout(DROPOUT_VAL)(lstm_out)
    lstm_out = CuDNNLSTM(LSTM_DIM, return_sequences=True)(lstm_out)
    lstm_out = Dropout(DROPOUT_VAL)(lstm_out)
    attention_mul = attention_3d_block_felixhao28(lstm_out)
    output = Dense(len(chars), activation='softmax')(attention_mul)