def train(model, optimizer, X_train, y_train, vocab_size, epoch, n_epoch, word_to_index): start_id = word_to_index[data_config['go']] end_id = word_to_index[data_config['eos']] batch_size = model_config['batch_size'] decoder_seq_length = model_config['decoder_seq_length'] n_step = len(X_train) // batch_size loss_count = 0 iter_count = 0 # training starts # set model in training mode model.train() for X, y in tqdm(tl.iterate.minibatches(inputs=X_train, targets=y_train, batch_size=batch_size, shuffle=False), total=n_step, desc='Epoch[{}/{}]'.format(epoch + 1, n_epoch), leave=False): X = tl.prepro.pad_sequences(X) decoder_input = tl.prepro.sequences_add_start_id(y, start_id=start_id, remove_last=False) decoder_input = tl.prepro.pad_sequences(decoder_input, maxlen=decoder_seq_length) decoder_output = tl.prepro.sequences_add_end_id(y, end_id=end_id) decoder_output = tl.prepro.pad_sequences(decoder_output, maxlen=decoder_seq_length) decoder_output_mask = tl.prepro.sequences_get_mask(decoder_output) with tf.GradientTape() as tape: # get outputs of model output = model(inputs=[X, decoder_input]) output = tf.reshape(output, [-1, vocab_size]) # computing loss loss = cross_entropy_seq_with_mask(logits=output, target_seqs=decoder_output, input_mask=decoder_output_mask) # updating model weights gradient = tape.gradient(loss, model.all_weights) optimizer.apply_gradients(zip(gradient, model.all_weights)) loss_count += loss iter_count += 1 return iter_count, loss_count
def train_model (): optimizer = tf.optimizers.Adam(learning_rate=0.001) for epoch in range(num_epochs): for seed in seeds: # make some predictions before training this epoch print("Q >", seed) for i in range(3): sentence, unknowns = inference(seed, 3) print(f"> {sentence} ({unknowns} unknowns)") model_.train() # puts the model in training mode #trainX, trainY = shuffle(trainX, trainY, random_state=0) # do not shuffle the training data # iterate over the data in batches total_loss, n_iter = 0, 0 for X, Y in tqdm(tl.iterate.minibatches(inputs=trainX, targets=trainY, batch_size=batch_size, shuffle=False), total=n_step, desc='Epoch[{}/{}]'.format(epoch + 1, num_epochs), leave=False): X = tl.prepro.pad_sequences(X) _target_seqs = tl.prepro.sequences_add_end_id(Y, end_id=end_id) _target_seqs = tl.prepro.pad_sequences(_target_seqs, maxlen=decoder_seq_length) _decode_seqs = tl.prepro.sequences_add_start_id(Y, start_id=start_id, remove_last=False) _decode_seqs = tl.prepro.pad_sequences(_decode_seqs, maxlen=decoder_seq_length) _target_mask = tl.prepro.sequences_get_mask(_target_seqs) with tf.GradientTape() as tape: ## compute outputs output = model_(inputs = [X, _decode_seqs]) output = tf.reshape(output, [-1, vocabulary_size]) # compute loss loss = cross_entropy_seq_with_mask(logits=output, target_seqs=_target_seqs, input_mask=_target_mask) # apply the gradients grad = tape.gradient(loss, model_.all_weights) optimizer.apply_gradients(zip(grad, model_.all_weights)) total_loss += loss n_iter += 1 # printing average loss after every epoch print('Epoch [{}/{}]: loss {:.4f}'.format(epoch + 1, num_epochs, total_loss / n_iter)) tl.files.save_npz(model_.all_weights, name=model_file) # save the weights to the file after every epoch
maxlen=decoder_seq_length) _decode_seqs = tl.prepro.sequences_add_start_id(Y, start_id=start_id, remove_last=False) _decode_seqs = tl.prepro.pad_sequences(_decode_seqs, maxlen=decoder_seq_length) _target_mask = tl.prepro.sequences_get_mask(_target_seqs) with tf.GradientTape() as tape: ## compute outputs output = model_(inputs=[X, _decode_seqs]) output = tf.reshape(output, [-1, vocabulary_size]) ## compute loss and update model loss = cross_entropy_seq_with_mask(logits=output, target_seqs=_target_seqs, input_mask=_target_mask) grad = tape.gradient(loss, model_.all_weights) optimizer.apply_gradients(zip(grad, model_.all_weights)) total_loss += loss n_iter += 1 # printing average loss after every epoch print('Epoch [{}/{}]: loss {:.4f}'.format(epoch + 1, num_epochs, total_loss / n_iter)) for seed in seeds: print("Query >", seed) top_n = 3
# Train tensors encode_in = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name='encode_in') decode_in = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name='decode_in') target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs") target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask") train_net, _ = model(encode_in, decode_in, isTrain=True, reuse=False) # Test tensors encode_in_1d = tf.placeholder(dtype=tf.int64, shape=[1, None], name='encode_in') decode_in_1d = tf.placeholder(dtype=tf.int64, shape=[1, None], name='decode_in') test_net, seq2seq = model(encode_in_1d, decode_in_1d, isTrain=False, reuse=True) test_net = tf.nn.softmax(test_net.outputs) loss = cross_entropy_seq_with_mask( logits=train_net.outputs, target_seqs=target_seqs, input_mask=target_mask, name='loss' ) train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) tl.layers.initialize_global_variables(sess) if not tl.files.load_and_assign_npz(sess=sess, name='./model.npz', network=train_net): if word2vec: emb_layer = embedding(encode_in) load_params = tl.files.load_npz(name='word2vec.npz') tl.files.assign_params(sess, [load_params], emb_layer) # Train for epoch in range(n_epoch): epoch_time = step_time = time.time()