class LSTMSequenceEmbedder(SequenceEmbedder): """Forward LSTM Sequence Embedder Also provide attention states. """ def __init__(self, token_embeds, seq_length, align='left', name='LSTMSequenceEmbedder', hidden_size=50): self.hidden_size = hidden_size super(LSTMSequenceEmbedder, self).__init__(token_embeds, align=align, seq_length=seq_length, name=name) def embed_sequences(self, embed_sequence_batch): self._forward_lstm = LSTM(self.hidden_size, return_sequences=True) # Pass input through the LSTMs # Shape: (batch_size, seq_length, hidden_size) hidden_state_values = self._forward_lstm(embed_sequence_batch.values, embed_sequence_batch.mask) self._hidden_states = SequenceBatch(hidden_state_values, embed_sequence_batch.mask) # Embedding dimension: (batch_size, hidden_size) shape = tf.shape(embed_sequence_batch.values) forward_final = tf.slice(hidden_state_values, [0, shape[1] - 1, 0], [-1, 1, self.hidden_size]) return tf.squeeze(forward_final, [1]) @property def weights(self): return self._forward_lstm.get_weights() @weights.setter def weights(self, w): self._forward_lstm.set_weights(w) @property def hidden_states(self): return self._hidden_states
class BidiLSTMSequenceEmbedder(SequenceEmbedder): """Bidirectional LSTM Sequence Embedder Also provide attention states. """ def __init__(self, token_embeds, seq_length, align='left', name='BidiLSTMSequenceEmbedder', hidden_size=50): self.seq_length = seq_length self.hidden_size = hidden_size super(BidiLSTMSequenceEmbedder, self).__init__(token_embeds, align=align, seq_length=seq_length, name=name) def embed_sequences(self, embed_sequence_batch): """Return sentence embeddings as a tensor with with shape [batch_size, hidden_size * 2] """ forward_values = embed_sequence_batch.values forward_mask = embed_sequence_batch.mask backward_values = tf.reverse(forward_values, [False, True, False]) backward_mask = tf.reverse(forward_mask, [False, True]) # Initialize LSTMs self._forward_lstm = LSTM(self.hidden_size, return_sequences=True) self._backward_lstm = LSTM(self.hidden_size, return_sequences=True) # Pass input through the LSTMs # Shape: (batch_size, seq_length, hidden_size) forward_seq = self._forward_lstm(forward_values, forward_mask) forward_seq.set_shape((None, self.seq_length, self.hidden_size)) backward_seq = self._backward_lstm(backward_values, backward_mask) backward_seq.set_shape((None, self.seq_length, self.hidden_size)) # Stitch the outputs together --> hidden states (for computing attention) # Final dimension: (batch_size, seq_length, hidden_size * 2) lstm_states = tf.concat(2, [forward_seq, tf.reverse(backward_seq, [False, True, False])]) self._hidden_states = SequenceBatch(lstm_states, forward_mask) # Stitch the final outputs together --> sequence embedding # Final dimension: (batch_size, hidden_size * 2) seq_length = tf.shape(forward_values)[1] forward_final = tf.slice(forward_seq, [0, seq_length - 1, 0], [-1, 1, self.hidden_size]) backward_final = tf.slice(backward_seq, [0, seq_length - 1, 0], [-1, 1, self.hidden_size]) return tf.squeeze(tf.concat(2, [forward_final, backward_final]), [1]) @property def weights(self): return (self._forward_lstm.get_weights(), self._backward_lstm.get_weights()) @weights.setter def weights(self, w): forward_weights, backward_weights = w self._forward_lstm.set_weights(forward_weights) self._backward_lstm.set_weights(backward_weights) @property def hidden_states(self): """Return a SequenceBatch whose value has shape [batch_size, max_seq_length, hidden_size * 2] """ return self._hidden_states
def fit(self, epochs): self.NUM_EPOCHS = epochs checkpoint = ModelCheckpoint(filepath=os.path.join( self.path, 'model/word-weights.h5'), save_best_only=True) self.model.fit_generator(generator=self.train_gen, steps_per_epoch=self.train_num_batches, epochs=self.NUM_EPOCHS, verbose=1, validation_data=self.test_gen, validation_steps=self.test_num_batches, callbacks=[checkpoint]) encoder_model = Model(self.encoder_inputs, self.encoder_states) encoder_model.save(os.path.join(self.path, 'model/encoder-weights.h5')) new_decoder_inputs = Input(batch_shape=(1, None, self.num_decoder_tokens), name='new_decoder_inputs') new_decoder_lstm = LSTM(units=self.HIDDEN_UNITS, return_state=True, return_sequences=True, name='new_decoder_lstm', stateful=True) new_decoder_outputs, _, _ = new_decoder_lstm(new_decoder_inputs) new_decoder_dense = Dense(units=self.num_decoder_tokens, activation='softmax', name='new_decoder_dense') new_decoder_outputs = new_decoder_dense(new_decoder_outputs) new_decoder_lstm.set_weights(self.decoder_lstm.get_weights()) new_decoder_dense.set_weights(self.decoder_dense.get_weights()) new_decoder_model = Model(new_decoder_inputs, new_decoder_outputs) new_decoder_model.save( os.path.join(self.path, 'model/decoder-weights.h5'))
print(len(X_test)) train_gen = generate_batch(X_train, y_train) test_gen = generate_batch(X_test, y_test) train_num_batches = len(X_train) // BATCH_SIZE test_num_batches = len(X_test) // BATCH_SIZE checkpoint = ModelCheckpoint(filepath=WEIGHT_FILE_PATH, save_best_only=True) model.fit_generator(generator=train_gen, steps_per_epoch=train_num_batches, epochs=NUM_EPOCHS, verbose=1, validation_data=test_gen, validation_steps=test_num_batches, callbacks=[checkpoint, earlystopping]) encoder_model = Model(encoder_inputs, encoder_states) encoder_model.save('model/encoder-weights.h5') new_decoder_inputs = Input(batch_shape=(1, None, num_decoder_tokens), name='new_decoder_inputs') new_decoder_lstm = LSTM(units=HIDDEN_UNITS, return_state=True, return_sequences=True, name='new_decoder_lstm', stateful=True) new_decoder_outputs, _, _ = new_decoder_lstm(new_decoder_inputs) new_decoder_dense = Dense(units=num_decoder_tokens, activation='softmax', name='new_decoder_dense') new_decoder_outputs = new_decoder_dense(new_decoder_outputs) new_decoder_lstm.set_weights(decoder_lstm.get_weights()) new_decoder_dense.set_weights(decoder_dense.get_weights()) new_decoder_model = Model(new_decoder_inputs, new_decoder_outputs) new_decoder_model.save('model/decoder-weights.h5')
class Generator: def __init__(self, lstm, lines, tf_session, tokenizer, n_tokens, max_line_length): self.tf_session = tf_session self.tokenizer = tokenizer self.n_tokens = n_tokens self.max_line_length = max_line_length self.lstm = LSTM(lstm.units, return_state=True, return_sequences=True, name='generator_lstm') self.lines = [ GeneratorLine('generator_line_%s' % i, lines[i], self.lstm, self.n_tokens) for i in range(3) ] self.lstm.set_weights(lstm.get_weights()) def generate_haiku(self, syllables=[5, 7, 5], temperature=.1, first_char=None): output = [] h = None c = None if first_char is None: first_char = chr(int(np.random.randint(ord('a'), ord('z') + 1))) next_char = self.tokenizer.texts_to_sequences(first_char)[0][0] for i in range(3): line = self.lines[i] s = self.tf_session.run( line.syllable_dense_output, feed_dict={line.syllable_input: [[syllables[i]]]}) if h is None: h = s c = s else: h = h + s c = c + s line_output = [next_char] end = False next_char = None for i in range(self.max_line_length): char, h, c = self.tf_session.run( [line.output, line.lstm_h, line.lstm_c], feed_dict={ line.char_input: [[ np_utils.to_categorical(line_output[-1], num_classes=self.n_tokens) ]], line.h_input: h, line.c_input: c }) char = sample(char[0, 0], temperature) if char == 1 and not end: end = True if char != 1 and end: next_char = char char = 1 line_output.append(char) cleaned_text = self.tokenizer.sequences_to_texts([ line_output ])[0].strip()[1:].replace(' ', '\n').replace(' ', '').replace('\n', ' ') print(cleaned_text) output.append(cleaned_text) return output