def train(model, optimizer, X_train, y_train, vocab_size, epoch, n_epoch,
          word_to_index):
    start_id = word_to_index[data_config['go']]
    end_id = word_to_index[data_config['eos']]

    batch_size = model_config['batch_size']
    decoder_seq_length = model_config['decoder_seq_length']

    n_step = len(X_train) // batch_size

    loss_count = 0
    iter_count = 0

    # training starts
    # set model in training mode
    model.train()
    for X, y in tqdm(tl.iterate.minibatches(inputs=X_train,
                                            targets=y_train,
                                            batch_size=batch_size,
                                            shuffle=False),
                     total=n_step,
                     desc='Epoch[{}/{}]'.format(epoch + 1, n_epoch),
                     leave=False):

        X = tl.prepro.pad_sequences(X)
        decoder_input = tl.prepro.sequences_add_start_id(y,
                                                         start_id=start_id,
                                                         remove_last=False)
        decoder_input = tl.prepro.pad_sequences(decoder_input,
                                                maxlen=decoder_seq_length)

        decoder_output = tl.prepro.sequences_add_end_id(y, end_id=end_id)
        decoder_output = tl.prepro.pad_sequences(decoder_output,
                                                 maxlen=decoder_seq_length)
        decoder_output_mask = tl.prepro.sequences_get_mask(decoder_output)

        with tf.GradientTape() as tape:
            # get outputs of model
            output = model(inputs=[X, decoder_input])
            output = tf.reshape(output, [-1, vocab_size])
            # computing loss
            loss = cross_entropy_seq_with_mask(logits=output,
                                               target_seqs=decoder_output,
                                               input_mask=decoder_output_mask)
            # updating model weights
            gradient = tape.gradient(loss, model.all_weights)
            optimizer.apply_gradients(zip(gradient, model.all_weights))

        loss_count += loss
        iter_count += 1

    return iter_count, loss_count
Пример #2
0
def train_model ():
  optimizer = tf.optimizers.Adam(learning_rate=0.001)

  for epoch in range(num_epochs):

      for seed in seeds: # make some predictions before training this epoch
          print("Q >", seed)
          for i in range(3):
              sentence, unknowns = inference(seed, 3)
              print(f"> {sentence} ({unknowns} unknowns)")

      model_.train() # puts the model in training mode
      #trainX, trainY = shuffle(trainX, trainY, random_state=0) # do not shuffle the training data
      # iterate over the data in batches
      total_loss, n_iter = 0, 0
      for X, Y in tqdm(tl.iterate.minibatches(inputs=trainX, targets=trainY, batch_size=batch_size, shuffle=False), 
                      total=n_step, desc='Epoch[{}/{}]'.format(epoch + 1, num_epochs), leave=False):

          X = tl.prepro.pad_sequences(X)
          _target_seqs = tl.prepro.sequences_add_end_id(Y, end_id=end_id)
          _target_seqs = tl.prepro.pad_sequences(_target_seqs, maxlen=decoder_seq_length)
          _decode_seqs = tl.prepro.sequences_add_start_id(Y, start_id=start_id, remove_last=False)
          _decode_seqs = tl.prepro.pad_sequences(_decode_seqs, maxlen=decoder_seq_length)
          _target_mask = tl.prepro.sequences_get_mask(_target_seqs)

          with tf.GradientTape() as tape:
              ## compute outputs
              output = model_(inputs = [X, _decode_seqs])
              output = tf.reshape(output, [-1, vocabulary_size])
              # compute loss
              loss = cross_entropy_seq_with_mask(logits=output, target_seqs=_target_seqs, input_mask=_target_mask)
              # apply the gradients
              grad = tape.gradient(loss, model_.all_weights)
              optimizer.apply_gradients(zip(grad, model_.all_weights))
          total_loss += loss
          n_iter += 1
      # printing average loss after every epoch
      print('Epoch [{}/{}]: loss {:.4f}'.format(epoch + 1, num_epochs, total_loss / n_iter))
      tl.files.save_npz(model_.all_weights, name=model_file) # save the weights to the file after every epoch
Пример #3
0
                                                   maxlen=decoder_seq_length)
            _decode_seqs = tl.prepro.sequences_add_start_id(Y,
                                                            start_id=start_id,
                                                            remove_last=False)
            _decode_seqs = tl.prepro.pad_sequences(_decode_seqs,
                                                   maxlen=decoder_seq_length)
            _target_mask = tl.prepro.sequences_get_mask(_target_seqs)

            with tf.GradientTape() as tape:
                ## compute outputs
                output = model_(inputs=[X, _decode_seqs])

                output = tf.reshape(output, [-1, vocabulary_size])
                ## compute loss and update model
                loss = cross_entropy_seq_with_mask(logits=output,
                                                   target_seqs=_target_seqs,
                                                   input_mask=_target_mask)

                grad = tape.gradient(loss, model_.all_weights)
                optimizer.apply_gradients(zip(grad, model_.all_weights))

            total_loss += loss
            n_iter += 1

        # printing average loss after every epoch
        print('Epoch [{}/{}]: loss {:.4f}'.format(epoch + 1, num_epochs,
                                                  total_loss / n_iter))

        for seed in seeds:
            print("Query >", seed)
            top_n = 3
Пример #4
0
# Train tensors
encode_in = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name='encode_in')
decode_in = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name='decode_in')
target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs")
target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask")
train_net, _ = model(encode_in, decode_in, isTrain=True, reuse=False)

# Test tensors
encode_in_1d = tf.placeholder(dtype=tf.int64, shape=[1, None], name='encode_in')
decode_in_1d = tf.placeholder(dtype=tf.int64, shape=[1, None], name='decode_in')
test_net, seq2seq = model(encode_in_1d, decode_in_1d, isTrain=False, reuse=True)
test_net = tf.nn.softmax(test_net.outputs)

loss = cross_entropy_seq_with_mask(
    logits=train_net.outputs, 
    target_seqs=target_seqs,
    input_mask=target_mask,
    name='loss'
)
train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)

sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
tl.layers.initialize_global_variables(sess)

if not tl.files.load_and_assign_npz(sess=sess, name='./model.npz', network=train_net):
    if word2vec:
        emb_layer = embedding(encode_in)
        load_params = tl.files.load_npz(name='word2vec.npz')
        tl.files.assign_params(sess, [load_params], emb_layer)
    # Train
    for epoch in range(n_epoch):
        epoch_time = step_time = time.time()