示例#1
0
 def build(self, input_shape=None):
     if self.share_parameters:
         # When sharing parameters, build the first leaf inputter and then set
         # all attributes with parameters to the other inputters.
         leaves = self.get_leaf_inputters()
         first, others = leaves[0], leaves[1:]
         # When the first leaf is also PrallelInputter and sharing parameters,
         # build the first leaf inputter of it and then set all attributes with
         # parameters to the other inputters.
         if isinstance(first, ParallelInputter):
             if first.share_parameters:
                 first.built = True
                 first_leaves = first.get_leaf_inputters()
                 others.append(first_leaves[1:])
                 first = first_leaves[0]
         with compat.tf_compat(v1="variable_scope")(
                 self._get_shared_name()):
             first.build(input_shape)
         for name, attr in six.iteritems(first.__dict__):
             if (isinstance(attr, tf.Variable)
                     or (isinstance(attr, tf.keras.layers.Layer)
                         and attr.variables)):
                 for inputter in others:
                     setattr(inputter, name, attr)
                     inputter.built = True
     else:
         for inputter, scope in zip(self.inputters, self._get_names()):
             with compat.tf_compat(v1="variable_scope")(scope):
                 inputter.build(input_shape)
     super(ParallelInputter, self).build(input_shape)
示例#2
0
    def make_features(self, element=None, features=None, training=None):
        if features is None:
            features = {}
        if self.input_features[0].name in features:
            return features
        if element is None:
            raise RuntimeError("make_features was called with None element")
        tf_parse_example = compat.tf_compat(v2="io.parse_single_example",
                                            v1="parse_single_example")
        tf_var_len_feature = compat.tf_compat(v2="io.VarLenFeature",
                                              v1="VarLenFeature")
        featuresDict = {}
        if self.has_word():
            featuresDict["numWords"] = tf_var_len_feature(tf.int64)

        for feature in self.input_features:
            featuresDict[feature.name] = tf_var_len_feature(tf.float32)

        example = tf_parse_example(element, features=featuresDict)

        if self.has_word():
            features["numWords"] = tf.cast(example["numWords"].values,
                                           tf.int32)[0]

        for feature in self.input_features:
            print(feature.name, feature.shape)
            features[feature.name] = tf.reshape(example[feature.name].values,
                                                feature.shape)

        print("features", features)
        return features
示例#3
0
def _lower_triangle_mask(sequence_length, maximum_length=None, dtype=tf.float32):
  batch_size = tf.shape(sequence_length)[0]
  if maximum_length is None:
    maximum_length = tf.reduce_max(sequence_length)
  mask = tf.ones([batch_size, maximum_length, maximum_length], dtype=dtype)
  mask = compat.tf_compat(v2="linalg.band_part", v1="matrix_band_part")(mask, -1, 0)
  return mask
示例#4
0
 def make_dataset(self, data_file, training=None):
     first_record = next(
         compat.tf_compat(v1="python_io.tf_record_iterator")(data_file))
     first_record = tf.train.Example.FromString(first_record)
     shape = first_record.features.feature["shape"].int64_list.value
     self.input_depth = shape[-1]
     return tf.data.TFRecordDataset(data_file)
示例#5
0
 def make_inputs(self, features, training=None):
   transformed = []
   for i, inputter in enumerate(self.inputters):
     with compat.tf_compat(v1="variable_scope")("inputter_{}".format(i)):
       transformed.append(inputter.make_inputs(features, training=training))
   outputs = self.reducer(transformed)
   outputs = tf.layers.dropout(outputs, rate=self.dropout, training=training)
   return outputs
示例#6
0
    def encode(self,
               inputs,
               sequence_length=None,
               mode=tf.estimator.ModeKeys.TRAIN):
        all_outputs = []
        all_states = []
        all_sequence_lengths = []
        parallel_inputs = isinstance(inputs, (list, tuple))
        parallel_encoders = isinstance(self.encoders, (list, tuple))

        if parallel_encoders and parallel_inputs and len(inputs) != len(
                self.encoders):
            raise ValueError(
                "ParallelEncoder expects as many inputs as parallel encoders")
        if parallel_encoders:
            encoders = self.encoders
        else:
            encoders = itertools.repeat(self.encoders,
                                        len(inputs) if parallel_inputs else 1)

        for i, encoder in enumerate(encoders):
            scope_name = "encoder_{}".format(
                i) if not self.share_parameters else "parallel_encoder"
            reuse = self.share_parameters and i > 0
            with compat.tf_compat(v1="variable_scope")(scope_name,
                                                       reuse=reuse):
                if parallel_inputs:
                    encoder_inputs = inputs[i]
                    length = sequence_length[i]
                else:
                    encoder_inputs = inputs
                    length = sequence_length

                outputs, state, length = encoder.encode(encoder_inputs,
                                                        sequence_length=length,
                                                        mode=mode)

                if self.outputs_layer_fn is not None:
                    if isinstance(self.outputs_layer_fn, list):
                        outputs = self.outputs_layer_fn[i](outputs)
                    else:
                        outputs = self.outputs_layer_fn(outputs)

                all_outputs.append(outputs)
                all_states.append(state)
                all_sequence_lengths.append(length)

        outputs, sequence_length = self.outputs_reducer(
            all_outputs, sequence_length=all_sequence_lengths)

        if self.combined_output_layer_fn is not None:
            outputs = self.combined_output_layer_fn(outputs)

        return (outputs, self.states_reducer(all_states), sequence_length)
示例#7
0
 def make_features(self, element=None, features=None, training=None):
     if features is None:
         features = {}
     if "tensor" in features:
         return features
     tf_parse_example = compat.tf_compat(v2="io.parse_single_example",
                                         v1="parse_single_example")
     tf_var_len_feature = compat.tf_compat(v2="io.VarLenFeature",
                                           v1="VarLenFeature")
     example = tf_parse_example(element,
                                features={
                                    "shape": tf_var_len_feature(tf.int64),
                                    "values": tf_var_len_feature(tf.float32)
                                })
     values = example["values"].values
     shape = tf.cast(example["shape"].values, tf.int32)
     tensor = tf.reshape(values, shape)
     tensor.set_shape([None, self.input_depth])
     features["length"] = tf.shape(tensor)[0]
     features["tensor"] = tf.cast(tensor, self.dtype)
     return features
示例#8
0
 def make_inputs(self, features, training=None):
   if not self.built:
     self.build()
   transformed = []
   for i, (inputter, scope) in enumerate(zip(self.inputters, self._get_scopes())):
     with compat.tf_compat(v1="variable_scope")(scope):
       if self.combine_features:
         sub_features = extract_prefixed_keys(features, "inputter_{}_".format(i))
       else:
         sub_features = features[i]
       transformed.append(inputter.make_inputs(sub_features, training=training))
   if self.reducer is not None:
     transformed = self.reducer(transformed)
   return transformed
def alignment_matrix_from_pharaoh(alignment_line,
                                  source_length,
                                  target_length,
                                  dtype=tf.float32):
    """Parse Pharaoh alignments into an alignment matrix.

  Args:
    alignment_line: A string ``tf.Tensor`` in the Pharaoh format.
    source_length: The length of the source sentence, without special symbols.
    target_length The length of the target sentence, without special symbols.
    dtype: The output matrix dtype. Defaults to ``tf.float32`` for convenience
      when computing the guided alignment loss.

  Returns:
    The alignment matrix as a 2-D ``tf.Tensor`` of type :obj:`dtype` and shape
    ``[target_length, source_length]``, where ``[i, j] = 1`` if the ``i`` th
    target word is aligned with the ``j`` th source word.
  """
    if compat.tf_supports("strings.split"):
        align_pairs_str = tf.strings.split([alignment_line]).values
        align_pairs_flat_str = tf.strings.split(align_pairs_str,
                                                sep="-").values
    else:
        align_pairs_str = tf.string_split([alignment_line],
                                          delimiter=" ").values
        align_pairs_flat_str = tf.string_split(align_pairs_str,
                                               delimiter="-").values
    align_pairs_flat = compat.tf_compat(v2="strings.to_number",
                                        v1="string_to_number")(
                                            align_pairs_flat_str,
                                            out_type=tf.int64)
    sparse_indices = tf.reshape(align_pairs_flat, [-1, 2])
    sparse_values = tf.ones([tf.shape(sparse_indices)[0]], dtype=dtype)
    source_length = tf.cast(source_length, tf.int64)
    target_length = tf.cast(target_length, tf.int64)
    if compat.tf_supports("sparse.to_dense"):
        alignment_matrix_sparse = tf.sparse.SparseTensor(
            sparse_indices, sparse_values, [source_length, target_length])
        alignment_matrix = tf.sparse.to_dense(alignment_matrix_sparse,
                                              validate_indices=False)
    else:
        alignment_matrix = tf.sparse_to_dense(sparse_indices,
                                              [source_length, target_length],
                                              sparse_values,
                                              validate_indices=False)
    return tf.transpose(alignment_matrix)
示例#10
0
  def testSequenceRecord(self):
    vector = np.array([[0.2, 0.3], [0.4, 0.5]], dtype=np.float32)

    record_file = os.path.join(self.get_temp_dir(), "data.records")
    writer = compat.tf_compat(v2="io.TFRecordWriter", v1="python_io.TFRecordWriter")(record_file)
    record_inputter.write_sequence_record(vector, writer)
    writer.close()

    inputter = record_inputter.SequenceRecordInputter()
    features, transformed = self._makeDataset(
        inputter,
        record_file,
        shapes={"tensor": [None, None, 2], "length": [None]})

    self.assertEqual([2], features["length"])
    self.assertAllEqual([vector], features["tensor"])
    self.assertAllEqual([vector], transformed)
示例#11
0
    def encode(self,
               inputs,
               sequence_length=None,
               mode=tf.estimator.ModeKeys.TRAIN):
        encoder_state = []

        for i, encoder in enumerate(self.encoders):
            with compat.tf_compat(v1="variable_scope")("encoder_{}".format(i)):
                if i > 0 and self.transition_layer_fn is not None:
                    if isinstance(self.transition_layer_fn, list):
                        inputs = self.transition_layer_fn[i - 1](inputs)
                    else:
                        inputs = self.transition_layer_fn(inputs)
                inputs, state, sequence_length = encoder.encode(
                    inputs, sequence_length=sequence_length, mode=mode)
                encoder_state.append(state)

        return (inputs, self.states_reducer(encoder_state), sequence_length)
示例#12
0
 def _detokenize_tensor(self, tokens):
     reduce_join = compat.tf_compat(v2="strings.reduce_join",
                                    v1="reduce_join")
     return reduce_join(tokens, axis=0, separator=" ")
示例#13
0
# pylint: disable=missing-docstring

"""Custom hooks."""

from __future__ import print_function

import io
import time
import six

import tensorflow as tf

from opennmt.utils import compat, misc

_SESSION_RUN_HOOK = compat.tf_compat(v2="estimator.SessionRunHook", v1="train.SessionRunHook")


class LogParametersCountHook(_SESSION_RUN_HOOK):
  """Simple hook that logs the number of trainable parameters."""

  def begin(self):
    tf.logging.info("Number of trainable parameters: %d", misc.count_parameters())


_DEFAULT_COUNTERS_COLLECTION = "counters"


def add_counter(name, tensor):
  """Registers a new counter.

  Args:
示例#14
0
 def get_dataset_size(self, data_file):
     return sum(1 for _ in compat.tf_compat(
         v1="python_io.tf_record_iterator")(data_file))