Пример #1
0
    def model_fn(features, labels, mode, params):
        tf.logging.info('*** Features ***')
        for name in sorted(features.keys()):
            tf.logging.info('  name = %s, shape = %s' %
                            (name, features[name].shape))

        input_ids = features['input_ids']
        target_ids = features['target_ids']
        masked_lm_positions = features['masked_lm_positions']
        masked_lm_ids = features['masked_lm_ids']
        masked_lm_weights = features['masked_lm_weights']

        is_training = mode == tf.estimator.ModeKeys.TRAIN

        model = transformer.TransformerEncoderDecoderModel(
            vocab_size,
            hidden_size,
            filter_size,
            num_heads,
            num_encoder_layers,
            num_decoder_layers,
            label_smoothing,
            dropout,
        )

        loss, outputs = model({
            'inputs': input_ids,
            'targets': target_ids
        },
                              training=is_training)

        # (
        #     masked_lm_loss,
        #     masked_lm_example_loss,
        #     masked_lm_log_probs,
        # ) = get_masked_lm_output(
        #     model._context['memory'],
        #     model._embedding_layer.weights_VxD,
        #     masked_lm_positions,
        #     masked_lm_ids,
        #     masked_lm_weights,
        # )

        total_loss = loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (
                assignment_map,
                initialized_variable_names,
            ) = get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info('**** Trainable Variables ****')
        for var in tvars:
            init_string = ''
            if var.name in initialized_variable_names:
                init_string = ', *INIT_FROM_CKPT*'
            tf.logging.info('  name = %s, shape = %s%s', var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            init_lr = learning_rate
            global_step = tf.train.get_global_step()
            lr = (init_lr / 0.01 *
                  tf.rsqrt(tf.maximum(tf.to_float(global_step), 10000)))

            optimizer = adafactor.AdafactorOptimizer(
                learning_rate=lr,
                decay_rate=adafactor.adafactor_decay_rate_pow(0.8),
                beta1=0.0,
            )
            if use_tpu:
                optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
            train_op = optimizer.minimize(loss, global_step=global_step)

            # global_step = tf.train.get_global_step()
            # lr = learning_rate_schedule_noam(
            #     global_step,
            #     total_train_steps = num_train_steps,
            #     warmup_steps = num_warmup_steps,
            # )

            # optimizer = adafactor.AdafactorOptimizer(
            #     learning_rate = lr, beta1 = 0.0
            # )
            # if use_tpu:
            #     optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)

            # train_op = optimizer.minimize(loss, global_step = global_step)

            train_op = optimization.create_optimizer(
                total_loss,
                learning_rate,
                num_train_steps,
                num_warmup_steps,
                use_tpu,
            )

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn,
            )
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(
                masked_lm_example_loss,
                masked_lm_log_probs,
                masked_lm_ids,
                masked_lm_weights,
            ):
                """Computes the loss and accuracy of the model."""
                masked_lm_log_probs = tf.reshape(
                    masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_example_loss = tf.reshape(masked_lm_example_loss,
                                                    [-1])
                masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
                masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
                masked_lm_accuracy = tf.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights,
                )
                masked_lm_mean_loss = tf.metrics.mean(
                    values=masked_lm_example_loss, weights=masked_lm_weights)

                return {
                    'masked_lm_accuracy': masked_lm_accuracy,
                    'masked_lm_loss': masked_lm_mean_loss,
                }

            eval_metrics = (
                metric_fn,
                [
                    masked_lm_example_loss,
                    masked_lm_log_probs,
                    masked_lm_ids,
                    masked_lm_weights,
                ],
            )
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn,
            )
        else:
            raise ValueError('Only TRAIN and EVAL modes are supported: %s' %
                             (mode))

        return output_spec
Пример #2
0
    def model_fn(features, labels, mode, params):
        tf.logging.info('*** Features ***')
        for name in sorted(features.keys()):
            tf.logging.info('  name = %s, shape = %s' %
                            (name, features[name].shape))

        inputs = features['input_ids']
        targets = features['target_ids']

        is_training = mode == tf.estimator.ModeKeys.TRAIN

        model = transformer.TransformerEncoderDecoderModel(
            vocab_size,
            hidden_size,
            filter_size,
            num_heads,
            num_encoder_layers,
            num_decoder_layers,
            label_smoothing,
            dropout,
        )

        loss, outputs = model({
            'inputs': inputs,
            'targets': targets
        },
                              training=is_training)

        total_loss = loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (
                assignment_map,
                initialized_variable_names,
            ) = get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info('**** Trainable Variables ****')
        for var in tvars:
            init_string = ''
            if var.name in initialized_variable_names:
                init_string = ', *INIT_FROM_CKPT*'
            tf.logging.info('  name = %s, shape = %s%s', var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            init_lr = learning_rate
            global_step = tf.train.get_global_step()
            lr = (init_lr / 0.01 *
                  tf.rsqrt(tf.maximum(tf.to_float(global_step), 10000)))

            optimizer = adafactor.AdafactorOptimizer(
                learning_rate=lr,
                decay_rate=adafactor.adafactor_decay_rate_pow(0.8),
                beta1=0.0,
            )
            if use_tpu:
                optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)

            train_op = optimizer.minimize(loss, global_step=global_step)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn,
            )
        elif mode == tf.estimator.ModeKeys.EVAL:

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, loss=total_loss, scaffold_fn=scaffold_fn)
        else:
            raise ValueError('Only TRAIN and EVAL modes are supported: %s' %
                             (mode))

        return output_spec