Exemplo n.º 1
0
    def train_step(inputs):
      input_ids, input_mask, input_segment_ids, target_ids_, target_mask, target_segment_ids, target_ids, draft_mask, refine_mask, grad_accum_flag = inputs
      with tf.GradientTape() as tape:

        (draft_predictions, draft_attention_weights,
          refine_predictions, refine_attention_weights) = model(
                                                               input_ids, input_mask, input_segment_ids,
                                                               target_ids_, target_mask, target_segment_ids,
                                                               True
                                                                   )
        train_variables = model.trainable_variables
        draft_summary_loss = loss_function(target_ids[:, 1:, :], draft_predictions, draft_mask)
        refine_summary_loss = loss_function(target_ids[:, :-1, :], refine_predictions, refine_mask)
        loss = draft_summary_loss + refine_summary_loss
        loss = tf.reduce_mean(loss)
        #loss = optimizer.get_scaled_loss(loss)
      gradients  = tape.gradient(loss, train_variables)
      #gradients = optimizer.get_unscaled_gradients(gradients)
      # Initialize the shadow variables with same type as the gradients
      if not accumulators:
        for tv in gradients:
          accumulators.append(tf.Variable(tf.zeros_like(tv), trainable=False))
      # accmulate the gradients to the shadow variables
      for (accumulator, grad) in zip(accumulators, gradients):
        accumulator.assign_add(grad)
      # apply the gradients and reset them to zero if the flag is true

      if grad_accum_flag:
        optimizer.apply_gradients(zip(accumulators, train_variables))
        for accumulator in (accumulators):
            accumulator.assign(tf.zeros_like(accumulator))

        train_loss(loss)
        train_accuracy(target_ids_[:, :-1], refine_predictions)
      return (loss,target_ids_[:, :-1], refine_predictions)
Exemplo n.º 2
0
def train_step(inp, tar, grad_accum_flag):
    with tf.GradientTape() as tape:
        draft_predictions, draft_attention_weights, draft_dec_output = draft_summary_model(
            inp, tar, training=True)
        refine_predictions, refine_attention_weights, refine_dec_output = refine_summary_model(
            inp, tar, training=True)
        train_variables = draft_summary_model.trainable_variables + refine_summary_model.trainable_variables
        draft_summary_loss = loss_function(tar[0][:, 1:, :], draft_predictions)
        refine_summary_loss = loss_function(tar[0][:, :-1, :],
                                            refine_predictions)
        loss = draft_summary_loss + refine_summary_loss
        scaled_loss = optimizer.get_scaled_loss(loss)
    scaled_gradients = tape.gradient(scaled_loss, train_variables)
    gradients = optimizer.get_unscaled_gradients(scaled_gradients)
    # Initialize the shadow variables with same type as the gradients
    if not accumulators:
        for tv in gradients:
            accumulators.append(tf.Variable(tf.zeros_like(tv),
                                            trainable=False))
    # accmulate the gradients to the shadow variables
    for (accumulator, grad) in zip(accumulators, gradients):
        accumulator.assign_add(grad)
    # apply the gradients and reset them to zero if the flag is true
    if grad_accum_flag:
        for accumlator in accumulators:
            accumulator.assign(
                tf.math.divide(accumulator, h_parms.accumulation_steps))
        optimizer.apply_gradients(zip(accumulators, train_variables))
        for accumulator in (accumulators):
            accumulator.assign(tf.zeros_like(accumulator))
    train_loss(loss)
    train_accuracy(tar[0][:, 1:, :], draft_predictions)
    train_accuracy(tar[0][:, :-1, :], refine_predictions)
Exemplo n.º 3
0
 def val_step(input_ids,
              input_mask,
              input_segment_ids,
              target_ids_,
              target_mask,
              target_segment_ids,
              target_ids,
              draft_mask,
              refine_mask,
              step,
              create_summ):
   (draft_predictions, draft_attention_weights,
    refine_predictions, refine_attention_weights) = model(
                                                          input_ids, input_mask, input_segment_ids,
                                                          target_ids_, target_mask, target_segment_ids,
                                                          False
                                                          )
   draft_summary_loss = loss_function(target_ids[:, 1:, :], draft_predictions, draft_mask)
   refine_summary_loss = loss_function(target_ids[:, :-1, :], refine_predictions, refine_mask)
   loss = draft_summary_loss + refine_summary_loss
   loss = tf.reduce_mean(loss)
   validation_loss(loss)
   validation_accuracy(target_ids_[:, :-1], refine_predictions)
   if create_summ:
     rouge, bert = tf_write_summary(target_ids_[:, :-1], refine_predictions, step)
   else:
     rouge, bert = (1.0, 1.0)
   return (rouge, bert)
Exemplo n.º 4
0
def val_step(inp, tar, epoch, create_summ):
    target_ids_, target_mask, target_segment_ids = tar
    mask = tf.math.logical_not(tf.math.equal(target_ids_[:, 1:], 0))
    target_ids = label_smoothing(
        tf.one_hot(target_ids_, depth=config.input_vocab_size))
    draft_predictions, draft_attention_weights, draft_dec_output, refine_predictions, refine_attention_weights, refine_dec_output = model(
        inp, tar, training=False)
    draft_summary_loss = loss_function(target_ids[:, 1:, :], draft_predictions,
                                       mask)
    refine_summary_loss = loss_function(target_ids[:, :-1, :],
                                        refine_predictions, mask)
    loss = draft_summary_loss + refine_summary_loss
    validation_loss(loss)
    validation_accuracy(tar_real, predictions)
Exemplo n.º 5
0
def train_step(inp, tar, grad_accum_flag):
  tar_inp = tar[:, :-1]
  tar_real = tar[:, 1:]
  enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)
  with tf.GradientTape() as tape:
    predictions, attention_weights, dec_output = model(
                                                       inp, 
                                                       tar_inp, 
                                                       enc_padding_mask, 
                                                       combined_mask, 
                                                       dec_padding_mask,
                                                       training=True
                                                       )
    train_variables = model.trainable_variables
    loss = loss_function(tar_real, predictions)
    scaled_loss = optimizer.get_scaled_loss(loss)
  scaled_gradients  = tape.gradient(scaled_loss, train_variables)
  gradients = optimizer.get_unscaled_gradients(scaled_gradients)
  # Initialize the shadow variables with same type as the gradients 
  if not accumulators:
    for tv in gradients:
      accumulators.append(tf.Variable(tf.zeros_like(tv), trainable=False))
  # accmulate the gradients to the shadow variables
  for (accumulator, grad) in zip(accumulators, gradients):
    accumulator.assign_add(grad)
  # apply the gradients and reset them to zero if the flag is true
  if grad_accum_flag:
    for accumlator in accumulators:
      accumulator.assign(tf.math.divide(accumulator,h_parms.accumulation_steps))
    optimizer.apply_gradients(zip(accumulators, train_variables))
    for accumulator in (accumulators):
        accumulator.assign(tf.zeros_like(accumulator))
  train_loss(loss)
  train_accuracy(tar_real, predictions)  
Exemplo n.º 6
0
def train_step(inp, tar, inp_shape, tar_shape, batch):
    tar_inp = tar[:, :-1]
    tar_real = tar[:, 1:]
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
        inp, tar_inp)
    with tf.GradientTape() as tape:
        predictions, attention_weights, dec_output = transformer(
            inp, tar_inp, True, enc_padding_mask, combined_mask,
            dec_padding_mask)
        train_variables = transformer.trainable_variables
        tf.debugging.check_numerics(predictions,
                                    "Nan's in the transformer predictions")
        if config.copy_gen:
            predictions = pointer_generator(dec_output,
                                            predictions,
                                            attention_weights,
                                            inp,
                                            inp_shape,
                                            tar_shape,
                                            batch,
                                            training=True)
            tf.debugging.check_numerics(
                predictions, "Nan's in the pointer_generator predictions")
        train_variables = train_variables + pointer_generator.trainable_variables
        loss = loss_function(tar_real, predictions)
    gradients = tape.gradient(loss, train_variables)
    optimizer.apply_gradients(zip(gradients, train_variables))
    train_loss(loss)
    train_accuracy(tar_real, predictions)
Exemplo n.º 7
0
def val_step(inp, tar, inp_shape, tar_shape, batch):
  tar_inp = tar[:, :-1]
  tar_real = tar[:, 1:]
  
  enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)
  predictions, attention_weights, dec_output = transformer(
                                                           inp, 
                                                           tar_inp, 
                                                           False, 
                                                           enc_padding_mask, 
                                                           combined_mask, 
                                                           dec_padding_mask
                                                           )
  if config.copy_gen:
    predictions = pointer_generator(
                            dec_output, 
                            predictions, 
                            attention_weights, 
                            inp, 
                            inp_shape, 
                            tar_shape, 
                            batch, 
                            training=False
                            )
  loss = loss_function(tar_real, predictions)
  validation_loss(loss)
  validation_accuracy(tar_real, predictions)
Exemplo n.º 8
0
def val_step(inp, tar, epoch, inp_shape, tar_shape, batch, create_summ):
    tar_inp = tar[:, :-1]
    tar_real = tar[:, 1:]
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
        inp, tar_inp)
    predictions, attention_weights, dec_output = transformer(
        inp, tar_inp, False, enc_padding_mask, combined_mask, dec_padding_mask)
    if config.copy_gen:
        predictions = pointer_generator(dec_output,
                                        predictions,
                                        attention_weights,
                                        inp,
                                        inp_shape,
                                        tar_shape,
                                        batch,
                                        training=False)
    loss = loss_function(tar_real, predictions)
    validation_loss(loss)
    validation_accuracy(tar_real, predictions)
    if create_summ:
        rouge, bert = tf_write_summary(tar_real, predictions, inp[:, 1:],
                                       epoch)
    else:
        rouge, bert = (1.0, 1.0)
    return (rouge, bert)
Exemplo n.º 9
0
def val_step(inp, tar, epoch, create_summ):

    draft_predictions, draft_attention_weights, draft_dec_output = draft_summary_model(
        inp, tar, training=False)
    refine_predictions, refine_attention_weights, refine_dec_output = refine_summary_model(
        inp, tar, training=False)
    draft_summary_loss = loss_function(tar[0][:, 1:, :], draft_predictions)
    refine_summary_loss = loss_function(tar[0][:, :-1, :], refine_predictions)
    loss = draft_summary_loss + refine_summary_loss
    validation_loss(loss)
    validation_accuracy(tar_real, predictions)
    if create_summ:
        rouge, bert = tf_write_summary(tar_real, predictions, inp[:, 1:],
                                       epoch)
    else:
        rouge, bert = (1.0, 1.0)
    return (rouge, bert)
Exemplo n.º 10
0
def train_step(inp, tar_inp, tar_real):
    enc_padding_mask, combined_mask, dec_padding_mask = utils.create_masks(
        inp, tar_inp)
    # shape(inp) = (batch_size, pad_size)
    # shape(predictions) = (batch_size, pad_size, tar_vocab_size)
    with tf.GradientTape() as tape:
        predictions, _ = transformer(inp, tar_inp, True, enc_padding_mask,
                                     combined_mask, dec_padding_mask)
        loss = metrics.loss_function(tar_real, predictions)

    gradients = tape.gradient(loss, transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
    train_loss(loss)
Exemplo n.º 11
0
def train_step(input_ids, input_mask, input_segment_ids, target_ids,
               target_mask, target_segment_ids, grad_accum_flag):
    inp = input_ids, input_mask, input_segment_ids
    tar = target_ids, target_mask, target_segment_ids
    target_ids_ = target_ids
    mask = tf.math.logical_not(tf.math.equal(target_ids_[:, 1:], 0))
    target_ids = label_smoothing(
        tf.one_hot(target_ids_, depth=config.input_vocab_size))
    with tf.GradientTape() as tape:
        draft_predictions, draft_attention_weights, draft_dec_output, refine_predictions, refine_attention_weights, refine_dec_output = model(
            inp, tar, True)
        train_variables = model.trainable_variables
        draft_summary_loss = loss_function(target_ids[:, 1:, :],
                                           draft_predictions, mask)
        refine_summary_loss = loss_function(target_ids[:, :-1, :],
                                            refine_predictions, mask)
        loss = draft_summary_loss + refine_summary_loss
        #scaled_loss = optimizer.get_scaled_loss(loss)
    gradients = tape.gradient(loss, train_variables)
    #gradients = optimizer.get_unscaled_gradients(scaled_gradients)
    # Initialize the shadow variables with same type as the gradients
    if not accumulators:
        for tv in gradients:
            accumulators.append(tf.Variable(tf.zeros_like(tv),
                                            trainable=False))
    # accmulate the gradients to the shadow variables
    for (accumulator, grad) in zip(accumulators, gradients):
        accumulator.assign_add(grad)
    # apply the gradients and reset them to zero if the flag is true
    if grad_accum_flag:
        for accumlator in accumulators:
            accumulator.assign(
                tf.math.divide(accumulator, h_parms.accumulation_steps))
        optimizer.apply_gradients(zip(accumulators, train_variables))
        for accumulator in (accumulators):
            accumulator.assign(tf.zeros_like(accumulator))
    train_loss(loss)
    train_accuracy(target_ids_[:, 1:], draft_predictions)
    train_accuracy(target_ids_[:, :-1], refine_predictions)
Exemplo n.º 12
0
def train_step(inp, tar, grad_accum_flag):
  accumulators = []
  tar_inp = tar[:, :-1]
  tar_real = tar[:, 1:]
  
  translated_output_temp, tape = beam_search_eval(inp, tar_real, h_parms.train_beam_size)

  predictions = translated_output_temp[-1][:,0,:] 
  #print(tar_real.shape, predictions.shape)
  train_variables = model.trainable_variables
  loss = loss_function(tar_real, predictions)
  scaled_loss = optimizer.get_scaled_loss(loss)
  scaled_gradients  = tape.gradient(scaled_loss, train_variables)
  gradients = optimizer.get_unscaled_gradients(scaled_gradients)
  train_loss(loss)
  train_accuracy(tar_real, predictions)  
Exemplo n.º 13
0
def train_step(images, word_target):  # word_target shape(bs, max_txt_length, vocab_size)
    loss = 0

    hidden = tf.zeros((BATCH_SIZE, decode_units))
    word_one_hot = word_target[:, 0, :]  # corresponding the word 'START'
    with tf.GradientTape() as tape:
        # Teacher forcing - feeding the target as the next input
        for i in range(1, word_target.shape[1]):
            y_pred, hidden = model(word_one_hot, hidden, images)
            word_one_hot = word_target[:, i, :]

            loss += loss_function(word_target[:, i, :], y_pred)

    batch_loss = loss / int(word_target.shape[1])
    variables = model.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))

    return batch_loss
Exemplo n.º 14
0
def val_step(inp, tar, epoch, create_summ):
  tar_inp = tar[:, :-1]
  tar_real = tar[:, 1:]
  enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)
  predictions, attention_weights, dec_output = model(
                                                     inp, 
                                                     tar_inp, 
                                                     enc_padding_mask, 
                                                     combined_mask, 
                                                     dec_padding_mask,
                                                     training=False
                                                     )
  loss = loss_function(tar_real, predictions)
  validation_loss(loss)
  validation_accuracy(tar_real, predictions)
  if create_summ: 
    rouge, bert = tf_write_summary(tar_real, predictions, inp[:, 1:], epoch)  
  else: 
    rouge, bert = (1.0, 1.0)  
  return (rouge, bert)
Exemplo n.º 15
0
def train(X, Y, act_fun, act_fun_back, architecture, loss_metric,
          learning_rate, epochs, metrics_period):
    layers = len(architecture)
    params = init_params(architecture)

    iterations = 0

    for epoch in range(epochs):
        for example_idx in range(len(X)):
            x = algebra.Vector(X[example_idx])
            y = algebra.Vector(Y[example_idx])

            y_hat, layer_outputs = propagation.net_forward_prop(
                layers, x, params, act_fun)

            output_gradient = propagation.output_gradient(y, y_hat)

            param_gradients = propagation.net_back_prop(
                layers, layer_outputs, output_gradient, params, act_fun_back)

            update_params(layers, params, param_gradients, learning_rate)

            iterations += 1

            # Metrics
            if iterations % metrics_period == 0:
                m_y_hat_list = []
                for m_idx in range(len(X)):
                    m_x = algebra.Vector(X[m_idx])
                    m_y_hat, _ = propagation.net_forward_prop(
                        layers, m_x, params, act_fun)
                    m_y_hat_list.append(m_y_hat.vector)
                loss = metrics.loss_function(m_y_hat_list, Y, loss_metric)
                accuracy = metrics.accuracy(m_y_hat_list, Y)
                print(
                    'Epoch: {}\tIter: {}k\t\tLoss: {}\t\tAccuracy: {}'.format(
                        epoch + 1, iterations / 1000, loss, accuracy))

    memory['layers'] = layers
    memory['params'] = params
    memory['act_fun'] = act_fun
Exemplo n.º 16
0
def train_step(inp, tar):
  tar_inp = tar[:, :-1]
  tar_real = tar[:, 1:]
  enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)
  with tf.GradientTape() as tape:
    predictions, attention_weights, dec_output = transformer(
                                                             inp, 
                                                             tar_inp, 
                                                             enc_padding_mask, 
                                                             combined_mask, 
                                                             dec_padding_mask,
                                                             training=True
                                                             )
    train_variables = transformer.trainable_variables
    loss = loss_function(tar_real, predictions)
    scaled_loss = optimizer.get_scaled_loss(loss)
  scaled_gradients = tape.gradient(scaled_loss, train_variables)
  gradients = optimizer.get_unscaled_gradients(scaled_gradients)
  optimizer.apply_gradients(zip(gradients, train_variables))
  train_loss(loss)
  train_accuracy(tar_real, predictions)  
Exemplo n.º 17
0
def val_step(inp, tar, epoch, create_summ):
    target_ids_, target_mask, target_segment_ids = tar
    mask = tf.math.logical_not(tf.math.equal(target_ids_[:, 1:], 0))
    #target_ids = tf.one_hot(target_ids, config.input_vocab_size)
    target_ids = label_smoothing(
        tf.one_hot(target_ids_, depth=config.input_vocab_size))
    draft_predictions, draft_attention_weights, draft_dec_output = draft_summary_model(
        inp, tar, training=False)
    # refine_predictions, refine_attention_weights, refine_dec_output = refine_summary_model(
    #                                                                                        inp,
    #                                                                                        tar,
    #                                                                                        training=False
    #                                                                                        )
    draft_summary_loss = loss_function(target_ids[:, 1:, :], draft_predictions,
                                       mask)
    #refine_summary_loss = loss_function(target_ids[:, :-1, :], refine_predictions)
    loss = draft_summary_loss  #+ refine_summary_loss
    validation_loss(loss)
    validation_accuracy(target_ids_[:, 1:], draft_predictions)
    # if create_summ:
    #   rouge, bert = tf_write_summary(tar_real, draft_predictions, inp[0][:, 1:], epoch)
    # else:
    #   rouge, bert = (1.0, 1.0)
    return (rouge, bert)
Exemplo n.º 18
0
    for n, data in enumerate(train_loader):

        im, gt_points, gt_normals = data
        if use_cuda:
            im = im.cuda()
            gt_points = gt_points.cuda()
            gt_normals = gt_normals.cuda()

        # Forward
        graph.reset()
        optimizer.zero_grad()
        pool = FeaturePooling(im)
        pred_points = model_gcn(graph, pool)

        # Loss
        loss = loss_function(pred_points, gt_points.squeeze(),
                                          gt_normals.squeeze(), graph)

        # Backward
        loss.backward()
        optimizer.step()

        curr_loss += loss

        # Log
        if (n+1)%log_step == 0:
            print("Epoch", epoch)
            print("Batch", n+1)
            print(" Loss:", curr_loss.data.item()/log_step)
            curr_loss = 0

        # Save
Exemplo n.º 19
0
        if use_cuda:
            ims = ims.cuda()
            gt_points_list = gt_points_list.cuda()
            gt_normals_list = gt_normals_list.cuda()

        # Forward
        graph.reset()
        optimizer.zero_grad()
        pools = []
        for i in range(5):
            pools.append(FeaturePooling(ims[i]))

        pred_points = model_gcn(graph, pools)

        # Loss
        loss = loss_function(pred_points, gt_points_list[0].squeeze(),
                                          gt_normals_list[0].squeeze(), graph)

        # Backward
        loss.backward()
        optimizer.step()

        curr_loss += loss

        # Log
        if (n+1)%log_step == 0:
            print("Epoch", epoch, flush=True)
            print("Batch", n+1, flush=True)
            print(" Loss:", curr_loss.data.item()/log_step, flush=True)
            curr_loss = 0

        # Save