def base_attention_lstm(vocabulary_size, time_steps=32, type="after"): """ :param vocabulary_size: :param time_steps: :param type: before/after :return: """ INPUT_DIM = 64 LSTM_UNITS = 32 text_input = Input(shape=(time_steps, ), dtype='int32', name='text') # (batch_size, time_steps) inputs = layers.Embedding(vocabulary_size, INPUT_DIM)( text_input) # (batch_size, time_steps, input_dim) if type == "before": output_attention_mul = attention_3d_block(inputs, time_steps=time_steps, single_attention_vector=True) attention_mul = LSTM(LSTM_UNITS, return_sequences=False)(output_attention_mul) else: # after lstm_out = LSTM(LSTM_UNITS, return_sequences=True)(inputs) lstm_out = Reshape((time_steps, LSTM_UNITS))(lstm_out) attention_mul = attention_3d_block(lstm_out, time_steps=time_steps, single_attention_vector=True) attention_mul = Flatten()(attention_mul) attention_mul = Dense(64)(attention_mul) output = Dense(1, activation='sigmoid')(attention_mul) model = Model(inputs=text_input, outputs=output) return model
def __init__(self, type, model_name): self.type = type self.model_name = model_name self.word2em = load_glove() # print(len(self.word2em)) # print(self.word2em['start']) self.target_word2idx = np.load( 'chatbot_train/models/' + DATA_SET_NAME + '/word-glove-target-word2idx.npy').item() self.target_idx2word = np.load( 'chatbot_train/models/' + DATA_SET_NAME + '/word-glove-target-idx2word.npy').item() context = np.load('chatbot_train/models/' + DATA_SET_NAME + '/word-glove-context.npy').item() self.max_encoder_seq_length = context['encoder_max_seq_length'] self.max_decoder_seq_length = context['decoder_max_seq_length'] self.num_decoder_tokens = context['num_decoder_tokens'] if ('attention' in self.type): # THIS IS STILL RANDOM IDEA # encoder_inputs = Input(shape=(None, MAX_INPUT_SEQ_LENGTH, GLOVE_EMBEDDING_SIZE), name='encoder_inputs') encoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='encoder_inputs') else: encoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='encoder_inputs') if (self.type == 'bidirectional'): print('PREDICTING ON BIDIRECTIONAL') encoder_lstm = Bidirectional( LSTM(units=HIDDEN_UNITS, return_state=True, name='encoder_lstm')) encoder_outputs, encoder_state_forward_h, encoder_state_forward_c, encoder_state_backward_h, encoder_state_backward_c = encoder_lstm( encoder_inputs) # IF BIDIRECTIONAL, NEEDS TO CONCATENATE FORWARD AND BACKWARD STATE encoder_state_h = Concatenate()( [encoder_state_forward_h, encoder_state_backward_h]) encoder_state_c = Concatenate()( [encoder_state_forward_c, encoder_state_backward_c]) else: encoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, name='encoder_lstm') if ('attention' in self.type): # THIS IS STILL RANDOM IDEA TO IGNORE THE 2ND DIMENSION # encoder_outputs, _, encoder_state_h, encoder_state_c = encoder_lstm(encoder_inputs) encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm( encoder_inputs) else: encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm( encoder_inputs) encoder_states = [encoder_state_h, encoder_state_c] if (self.type == 'bidirectional'): decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='decoder_inputs') decoder_lstm = LSTM(units=HIDDEN_UNITS * 2, return_state=True, return_sequences=True, name='decoder_lstm') decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm( decoder_inputs, initial_state=encoder_states) else: if ('attention' in self.type): # HERE, THE GLOVE EMBEDDING SIZE ACTS AS THE INPUT DIMENSION # IF USING ATTENTION, WE NEED TO SET SHAPE WITH TIME STEPS, NOT WITH NONE # THIS INPUT WILL BE USED WHEN BUILDING ENCODER OUTPUTS # decoder_inputs = Input(shape=(None, attention_lstm.TIME_STEPS, GLOVE_EMBEDDING_SIZE), name='decoder_inputs') # decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='decoder_inputs') # decoder_inputs = Input(shape=(MAX_TARGET_SEQ_LENGTH + 2, GLOVE_EMBEDDING_SIZE), name='decoder_inputs') decoder_inputs = Input(shape=(self.max_decoder_seq_length, GLOVE_EMBEDDING_SIZE), name='decoder_inputs') if (self.type == 'attention_before'): attention_mul = attention_lstm.attention_3d_block( decoder_inputs, self.max_decoder_seq_length) else: decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='decoder_inputs') # PAY ATTENTION THAT DECODER AND ENCODER STATE MUST ALWAYS HAVE THE SAME DIMENSION # IN THIS CASE, WE USE 2D decoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, return_sequences=True, name='decoder_lstm') if ('attention' in self.type): # REMOVE ENCODER AS INITIAL STATE FOR ATTENTION # decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs) decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm( decoder_inputs, initial_state=encoder_states) else: decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm( decoder_inputs, initial_state=encoder_states) if (self.type == 'attention_after'): attention_mul = attention_lstm.attention_3d_block( decoder_outputs, self.max_decoder_seq_length) # SOMEHOW THIS FLATTEN FUNCTION CAUSE THE PROBLEM # attention_mul = Flatten()(attention_mul) decoder_dense = Dense(units=self.num_decoder_tokens, activation='softmax', name='decoder_dense') if (self.type == 'attention_after' or self.type == 'attention_before'): decoder_outputs = decoder_dense(attention_mul) else: decoder_outputs = decoder_dense(decoder_outputs) self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs) # model_json = open('chatbot_train/models/' + DATA_SET_NAME + '/word-glove-architecture.json', 'r').read() # self.model = model_from_json(model_json) # CHANGE THE MODEL FILE TO ENV SO IT CAN BE CONFIGURABLE WHICH # MODEL (ITERATION) WILL BE USED TO REPLY # self.model.load_weights('chatbot_train/models/' + DATA_SET_NAME + '/word-glove-weights.h5') self.model.load_weights('chatbot_train/models/' + DATA_SET_NAME + '/' + os.getenv(self.model_name)) self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy') self.encoder_model = Model(encoder_inputs, encoder_states) if (self.type == 'bidirectional'): decoder_state_inputs = [ Input(shape=(HIDDEN_UNITS * 2, )), Input(shape=(HIDDEN_UNITS * 2, )) ] else: decoder_state_inputs = [ Input(shape=(HIDDEN_UNITS, )), Input(shape=(HIDDEN_UNITS, )) ] decoder_outputs, state_h, state_c = decoder_lstm( decoder_inputs, initial_state=decoder_state_inputs) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(decoder_outputs) self.decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states)
decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='decoder_inputs') decoder_lstm = LSTM(units=HIDDEN_UNITS * 2, return_state=True, return_sequences=True, name='decoder_lstm') decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs, initial_state=encoder_states) else: if('attention' in sys.argv[1]): # HERE, THE GLOVE EMBEDDING SIZE ACTS AS THE INPUT DIMENSION # IF USING ATTENTION, WE NEED TO SET SHAPE WITH TIME STEPS, NOT WITH NONE # THIS INPUT WILL BE USED WHEN BUILDING ENCODER OUTPUTS # decoder_inputs = Input(shape=(None, attention_lstm.TIME_STEPS, GLOVE_EMBEDDING_SIZE), name='decoder_inputs') # decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='decoder_inputs') decoder_inputs = Input(shape=(decoder_max_seq_length, GLOVE_EMBEDDING_SIZE), name='decoder_inputs') if(sys.argv[1] == 'attention_before'): attention_mul = attention_lstm.attention_3d_block(decoder_inputs, decoder_max_seq_length) else: decoder_inputs = Input(shape=(None, GLOVE_EMBEDDING_SIZE), name='decoder_inputs') # PAY ATTENTION THAT DECODER AND ENCODER STATE MUST ALWAYS HAVE THE SAME DIMENSION # IN THIS CASE, WE USE 2D decoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, return_sequences=True, name='decoder_lstm') if('attention' in sys.argv[1]): # REMOVE ENCODER AS INITIAL STATE FOR ATTENTION # decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs) decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs, initial_state=encoder_states) else: decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(decoder_inputs, initial_state=encoder_states)
model.add(Dropout(DROPOUT_VAL)) model.add(Dense(LSTM_DIM)) model.add(Dropout(DROPOUT_VAL)) model.add(Dense(len(chars))) model.add(Activation("softmax")) elif network == "LSTM2-ATTENTION": # Model 2 - CuDNNLSTM X*2 + attention block from keras.models import Model from keras.layers import Input, Flatten inputs = Input(shape=(maxlen, len(chars),)) lstm_out = CuDNNLSTM(LSTM_DIM, return_sequences=True)(inputs) lstm_out = Dropout(DROPOUT_VAL)(lstm_out) lstm_out = CuDNNLSTM(LSTM_DIM, return_sequences=True)(lstm_out) lstm_out = Dropout(DROPOUT_VAL)(lstm_out) attention_mul = attention_3d_block(lstm_out) attention_mul = Flatten()(attention_mul) output = Dense(len(chars), activation='softmax')(attention_mul) model = Model(input=[inputs], output=output) elif network == "LSTM2-ATTENTION-FELIX": # Model 2 - CuDNNLSTM X*2 + attention block from keras.models import Model from keras.layers import Input inputs = Input(shape=(maxlen, len(chars),)) lstm_out = CuDNNLSTM(LSTM_DIM, return_sequences=True)(inputs) lstm_out = Dropout(DROPOUT_VAL)(lstm_out) lstm_out = CuDNNLSTM(LSTM_DIM, return_sequences=True)(lstm_out) lstm_out = Dropout(DROPOUT_VAL)(lstm_out) attention_mul = attention_3d_block_felixhao28(lstm_out) output = Dense(len(chars), activation='softmax')(attention_mul)