Exemplo n.º 1
0
def model_fn(features, labels, mode, params):
    """Defines how to train, evaluate and predict from the transformer model."""
    with tf.variable_scope("model"):
        inputs, targets = features, labels

        # Create model and get output logits.
        model = transformer.Transformer(params,
                                        mode == tf.estimator.ModeKeys.TRAIN)

        logits = model(inputs, targets)

        # When in prediction mode, the labels/targets is None. The model output
        # is the prediction
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(
                tf.estimator.ModeKeys.PREDICT,
                predictions=logits,
                export_outputs={
                    "translate": tf.estimator.export.PredictOutput(logits)
                })

        # Explicitly set the shape of the logits for XLA (TPU). This is needed
        # because the logits are passed back to the host VM CPU for metric
        # evaluation, and the shape of [?, ?, vocab_size] is too vague. However
        # it is known from Transformer that the first two dimensions of logits
        # are the dimensions of targets. Note that the ambiguous shape of logits is
        # not a problem when computing xentropy, because padded_cross_entropy_loss
        # resolves the shape on the TPU.
        logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:])

        # Calculate model loss.
        # xentropy contains the cross entropy loss of every nonpadding token in the
        # targets.
        xentropy, weights = metrics.padded_cross_entropy_loss(
            logits, targets, params["label_smoothing"],
            params["targets_vocab_size"])
        loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

        # Save loss as named tensor that will be logged with the logging hook.
        tf.identity(loss, "cross_entropy")

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(
                mode=mode,
                loss=loss,
                predictions={"predictions": logits},
                eval_metric_ops=metrics.get_eval_metrics(
                    logits, labels, params))
        else:
            train_op, metric_dict = get_train_op_and_metrics(loss, params)
            # Epochs can be quite long. This gives some intermediate information
            # in TensorBoard.
            metric_dict["minibatch_loss"] = loss
            record_scalars(metric_dict)
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)
Exemplo n.º 2
0
def model_fn_transformer(
    batch_sequence,
    is_traininig=False,
    transformer_params=model_params,
    NEG_INF=-1e9,
    soft_attention=True,
    component_wise=False,
):

    transformer = transformer_main.Transformer(transformer_params,
                                               is_traininig)
    # Mask which indicates whici element were zero padded and should be ignored
    zero_mask = tf.reduce_all(tf.equal(batch_sequence, 0), axis=-1)
    zero_mask_f = tf.cast(zero_mask, tf.float32) * NEG_INF

    # Transformer_PART
    with tf.name_scope("Transformer_encoder"):

        output = transformer(batch_sequence)

    # DNN_PART
    with tf.name_scope("Framewise_aggregator"):
        units = 512 if component_wise else 1
        weights = tf.layers.dense(output, units=units, reuse=tf.AUTO_REUSE)

        weights += zero_mask_f[..., None]

        dist_function = (partial(tf.nn.softmax, axis=1)
                         if soft_attention else tf.contrib.sparsemax.sparsemax)

        if not component_wise:
            weights = tf.squeeze(weights, axis=-1)

        if not component_wise or soft_attention:
            normalized_across_components = dist_function(weights)
        else:
            normalized_across_components = tf.transpose(
                tf.map_fn(dist_function, tf.transpose(weights, (0, 2, 1))),
                (0, 2, 1))
        if not component_wise:
            normalized_across_components = normalized_across_components[...,
                                                                        None]

        aggregated_embeddings = normalized_across_components * batch_sequence
        aggregated_embeddings = tf.reduce_sum(aggregated_embeddings, axis=1)

        if component_wise:
            normalized_across_components = tf.linalg.norm(
                normalized_across_components, axis=-1)[..., None]

    return aggregated_embeddings, normalized_across_components
def model_fn(features, labels, mode, params):
    """Defines how to train, evaluate and predict from the transformer model.
      labels: liste de 4 tensors shape [batch_size, seq_len, 1], un tenseur pour chaque ligne
    """
    with tf.variable_scope("model"):
        inputs, targets = features, labels

        # Create model and get output logits.
        conv = cnn_layer.CNNNetwork(4, 2, [5, 5], 0.4,
                                    mode == tf.estimator.ModeKeys.TRAIN)
        model = transformer.Transformer(params,
                                        mode == tf.estimator.ModeKeys.TRAIN)

        conv_output = conv(inputs)
        log = [
            model(conv_output[0], targets["line1"]),
            model(conv_output[1], targets["line2"]),
            model(conv_output[2], targets["line3"]),
            model(conv_output[3], targets["line4"])
        ]

        # When in prediction mode, the labels/targets is None. The model output
        # is the prediction
        if mode == tf.estimator.ModeKeys.PREDICT:
            if params["use_tpu"]:
                raise NotImplementedError(
                    "Prediction is not yet supported on TPUs.")
            return tf.estimator.EstimatorSpec(
                tf.estimator.ModeKeys.PREDICT,
                predictions=log,
                # export_outputs={
                # "translate": tf.estimator.export.PredictOutput(logits)
                # })
            )

        # Explicitly set the shape of the logits for XLA (TPU). This is needed
        # because the logits are passed back to the host VM CPU for metric
        # evaluation, and the shape of [?, ?, vocab_size] is too vague. However
        # it is known from Transformer that the first two dimensions of logits
        # are the dimensions of targets. Note that the ambiguous shape of logits is
        # not a problem when computing xentropy, because padded_cross_entropy_loss
        # resolves the shape on the TPU.
        for line, logits in enumerate(log):
            logits.set_shape(targets.shape.as_list() +
                             logits.shape.as_list()[2:])

            # Calculate model loss.
            # xentropy contains the cross entropy loss of every nonpadding token in the
            # targets.

            xentropy, weights = metrics.padded_cross_entropy_loss(
                logits, targets, params["label_smoothing"],
                params["vocab_size"])
            loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

            if mode == tf.estimator.ModeKeys.EVAL:
                if params["use_tpu"]:
                    # host call functions should only have tensors as arguments.
                    # This lambda pre-populates params so that metric_fn is
                    # TPUEstimator compliant.
                    raise NotImplementedError(
                        "Prediction is not yet supported on TPUs.")
                return tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=loss,
                    predictions={"predictions": logits},
                    eval_metric_ops=metrics.get_eval_metrics(
                        logits,
                        list(labels.keys())[line], params))
            else:
                train_op, metric_dict = get_train_op_and_metrics(loss, params)

                record_scalars(metric_dict)
                return tf.estimator.EstimatorSpec(mode=mode,
                                                  loss=loss,
                                                  train_op=train_op)
Exemplo n.º 4
0
    def __init__(self,
                 init_features=16,
                 data_shape=(32, 800),
                 gp_scale=10,
                 batch_size=16,
                 max_labels=100,
                 num_chars=276,
                 color_lambda=0,
                 ctc_lambda=1):

        self.num_chars = num_chars
        self.max_labels = max_labels
        self.batch_size = batch_size
        self.init_features = init_features
        self.scale = gp_scale
        self.ctc_lambda = ctc_lambda
        self.color_lambda = color_lambda
        self.final_stage = np.ceil((np.log2(data_shape[0]) - 2)).astype(
            np.uint8
        )  # number of maxpooling steps to achieve tensor of size 4 or less
        self.params = tf_params.MEDIUM_PARAMS

        self.graph = tf.Graph()
        tf.reset_default_graph()
        self.initialized_vars = []
        with self.graph.as_default():
            with tf.variable_scope("DIS"):
                self.dis_transformer = tft.Transformer(self.params, train=True)
            with tf.variable_scope("GEN"):
                self.gen_transformer = tft.Transformer(self.params, train=True)

            # set i/o
            self.input_images = tf.placeholder(tf.float32,
                                               shape=(batch_size,
                                                      data_shape[0],
                                                      data_shape[1], 3),
                                               name='input_content')
            self.reference_images = tf.placeholder(tf.float32,
                                                   shape=(batch_size,
                                                          data_shape[0],
                                                          data_shape[1], 3))
            self.transcriptions = tf.placeholder(tf.int32,
                                                 shape=(batch_size,
                                                        max_labels),
                                                 name='input_transcriptions')
            self.reference_transcriptions = tf.placeholder(tf.int32,
                                                           shape=(batch_size,
                                                                  max_labels))

            self.ctc_targets = tf.sparse_placeholder(tf.int32,
                                                     name='ctc_targets')
            self.ctc_seq_len = tf.placeholder(tf.int32, [None],
                                              name='ctc_seq_len')

            self.inference_content = tf.placeholder(tf.float32,
                                                    shape=(1, data_shape[0],
                                                           data_shape[1], 3),
                                                    name='inference_content')
            self.inference_transcriptions = tf.placeholder(
                tf.int32,
                shape=(1, max_labels),
                name='inference_transcriptions')
            self.inference_op = self.generator(self.inference_content,
                                               self.inference_transcriptions,
                                               train=False)
            self.inference_op = tf.identity(self.inference_op,
                                            name='inference_op')
def model_fn(features, labels, mode, params):
    """Defines how to train, evaluate and predict from the transformer model."""
    with tf.variable_scope("model"):
        inputs = features[0]
        segments = features[1]
        masks = features[2]
        targets = labels

        # Create model and get output logits.
        model = transformer.Transformer(params,
                                        mode == tf.estimator.ModeKeys.TRAIN)

        logits = model(inputs, segments, masks, targets)

        # When in prediction mode, the labels/targets is None. The model output
        # is the prediction
        if mode == tf.estimator.ModeKeys.PREDICT:
            if params["use_tpu"]:
                raise NotImplementedError(
                    "Prediction is not yet supported on TPUs.")
            return tf.estimator.EstimatorSpec(
                tf.estimator.ModeKeys.PREDICT,
                predictions=logits,
                export_outputs={
                    "translate": tf.estimator.export.PredictOutput(logits)
                })

        # Explicitly set the shape of the logits for XLA (TPU). This is needed
        # because the logits are passed back to the host VM CPU for metric
        # evaluation, and the shape of [?, ?, vocab_size] is too vague. However
        # it is known from Transformer that the first two dimensions of logits
        # are the dimensions of targets. Note that the ambiguous shape of logits is
        # not a problem when computing xentropy, because padded_cross_entropy_loss
        # resolves the shape on the TPU.
        logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:])

        # Calculate model loss.
        # xentropy contains the cross entropy loss of every nonpadding token in the
        # targets.
        xentropy, weights = metrics.padded_cross_entropy_loss(
            logits, targets, params["label_smoothing"], params["vocab_size"])
        loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

        # Save loss as named tensor that will be logged with the logging hook.
        tf.identity(loss, "cross_entropy")

        if mode == tf.estimator.ModeKeys.EVAL:
            if params["use_tpu"]:
                # host call functions should only have tensors as arguments.
                # This lambda pre-populates params so that metric_fn is
                # TPUEstimator compliant.
                metric_fn = lambda logits, labels: (metrics.get_eval_metrics(
                    logits, labels, params=params))
                eval_metrics = (metric_fn, [logits, targets])
                return tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    predictions={"predictions": logits},
                    eval_metrics=eval_metrics)
            return tf.estimator.EstimatorSpec(
                mode=mode,
                loss=loss,
                predictions={"predictions": logits},
                eval_metric_ops=metrics.get_eval_metrics(
                    logits, targets, params))
        else:
            train_op, metric_dict = get_train_op_and_metrics(loss, params)

            # Epochs can be quite long. This gives some intermediate information
            # in TensorBoard.
            metric_dict["minibatch_loss"] = loss
            if params["use_tpu"]:
                return tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    train_op=train_op,
                    host_call=tpu_util.construct_scalar_host_call(
                        metric_dict=metric_dict,
                        model_dir=params["model_dir"],
                        prefix="training/"))
            record_scalars(metric_dict)
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)