Exemplo n.º 1
0
    def preprocessing_fn(inputs):
        """Preprocess input columns into transformed columns."""
        # Since we are modifying some features and leaving others unchanged, we
        # start by setting `outputs` to a copy of `inputs.
        outputs = inputs.copy()

        # Scale numeric columns to have range [0, 1].
        for key in NUMERIC_FEATURE_KEYS:
            outputs[key] = tft.scale_to_0_1(outputs[key])

        # For all categorical columns except the label column, we generate a
        # vocabulary but do not modify the feature.  This vocabulary is instead
        # used in the trainer, by means of a feature column, to convert the feature
        # from a string to an integer id.
        for key in CATEGORICAL_FEATURE_KEYS:
            tft.vocabulary(inputs[key], vocab_filename=key)

        # For the label column we provide the mapping from string to index.
        def convert_label(label):
            table = tf.contrib.lookup.index_table_from_tensor(
                ['>50K', '<=50K'])
            return table.lookup(label)

        outputs[LABEL_KEY] = tft.apply_function(convert_label,
                                                outputs[LABEL_KEY])

        return outputs
Exemplo n.º 2
0
    def preprocessing_fn(inputs):
        """Preprocess input columns into transformed columns."""
        # Since we are modifying some features and leaving others unchanged, we
        # start by setting `outputs` to a copy of `inputs.
        outputs = inputs.copy()

        # Scale numeric columns to have range [0, 1].
        for key in NUMERIC_FEATURE_KEYS:
            outputs[key] = tft.scale_to_0_1(outputs[key])

        for key in OPTIONAL_NUMERIC_FEATURE_KEYS:
            # This is a SparseTensor because it is optional. Here we fill in a default
            # value when it is missing.
            dense = tf.sparse_to_dense(outputs[key].indices,
                                       [outputs[key].dense_shape[0], 1],
                                       outputs[key].values,
                                       default_value=0.)
            # Reshaping from a batch of vectors of size 1 to a batch to scalars.
            dense = tf.squeeze(dense, axis=1)
            outputs[key] = tft.scale_to_0_1(dense)

        # For all categorical columns except the label column, we generate a
        # vocabulary but do not modify the feature.  This vocabulary is instead
        # used in the trainer, by means of a feature column, to convert the feature
        # from a string to an integer id.
        for key in CATEGORICAL_FEATURE_KEYS:
            tft.vocabulary(inputs[key], vocab_filename=key)

        # For the label column we provide the mapping from string to index.
        table = tf.contrib.lookup.index_table_from_tensor(['>50K', '<=50K'])
        outputs[LABEL_KEY] = table.lookup(outputs[LABEL_KEY])

        return outputs
    def preprocessing_fn(inputs):
        """tf.transform's callback function for preprocessing inputs.
        Args:
          inputs: map from feature keys to raw not-yet-transformed features.
        Returns:
          Map from string feature key to transformed feature operations.
        """
        outputs = {}
        DENSE_FLOAT_FEATURE_KEYS = []
        VOCAB_FEATURE_KEYS = []
        _CSV_COLUMNS_NAMES, _CSV_COLUMN_DEFAULTS, _CSV_COLUMN_types, _UNUSED = setcolumn_list_original(
        )
        for i in range(len(_CSV_COLUMNS_NAMES)):
            if _CSV_COLUMN_types[i] is tf.string:
                VOCAB_FEATURE_KEYS.append(_CSV_COLUMNS_NAMES[i])

        outputs['gci'] = tf.expand_dims(_fill_in_missing(inputs['gci']), 1)
        for key in VOCAB_FEATURE_KEYS:
            if key in _UNUSED:
                continue
            if 'gci' in key:
                appendlist = tf.expand_dims(_fill_in_missing(inputs[key]), 1)
                outputs['gci'] = tf.concat([appendlist, outputs['gci']], 0)
        transform.vocabulary(outputs['gci'], vocab_filename='gci')
        transform.vocabulary(inputs['LAT_LON_10'], vocab_filename='label')
        return outputs
Exemplo n.º 4
0
def preprocessing_fn(inputs):
    """Tftransform processing function"""
    tft.vocabulary(inputs["example_categ"], vocab_filename="example_categ")
    return {
        "context_num": tft.scale_to_0_1(inputs["context_num"]),
        "example_categ": inputs["example_categ"],
        "example_num": tft.scale_to_0_1(inputs["example_num"]),
        "label": inputs["label"]
    }
Exemplo n.º 5
0
    def transform_to_tfrecord(self, inputs):
        """Preprocess raw input columns into transformed columns."""
        outputs = inputs.copy()

        for key in self.data_formatter.number_features:
            outputs[key] = tft.scale_to_z_score((outputs[key]))

        for key in self.data_formatter.vocabulary_features:
            tft.vocabulary(inputs[key], vocab_filename=key)

        return outputs
Exemplo n.º 6
0
def preprocess_fn(inputs):
    """TensorFlow transform preprocessing function.

    Args:
        inputs: Dict of key to Tensor.
    Returns:
        Dict of key to transformed Tensor.
    """
    outputs = inputs.copy()
    for key in CATEGORICAL_COLUMNS:
        tft.vocabulary(inputs[key], vocab_filename=key)
    return outputs
            def preprocessing_fn_train(inputs):
                """Preprocess input columns into transformed columns."""
                context = inputs['Context']
                utterance = inputs['Utterance']
                vocab = tf.concat([context, utterance], 0)

                context_tokens = tf.compat.v1.string_split(context, DELIMITERS)
                utterance_tokens = tf.compat.v1.string_split(
                    utterance, DELIMITERS)
                vocab_tokens = tf.compat.v1.string_split(vocab, DELIMITERS)

                vocab_mapping_file_path = tft.vocabulary(
                    vocab_tokens, vocab_filename='anantvir_train_vocab')

                mapped_context = tft.apply_vocabulary(
                    context_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)
                print(mapped_context)

                mapped_utterance = tft.apply_vocabulary(
                    utterance_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)

                return {
                    'Context': mapped_context,
                    'Utterance': mapped_utterance,
                }
Exemplo n.º 8
0
    def preprocessing_fn(inputs):
        """Preprocess input columns into transformed columns."""
        outputs = {}

        for key in numerical_feats:
            outputs[key] = tf.cast(tft.bucketize(inputs[key], 20),
                                   tf.float32) / 20.0 - 0.5

        outputs["campaignCost_mod"] = inputs["campaignCost"] / 100.0

        inputs["game_zone"] = tf.string_join(
            [inputs["sourceGameId"], inputs["zone"]], separator="_")
        inputs["game_campaignId"] = tf.string_join(
            [inputs["sourceGameId"], inputs["campaignId"]], separator="_")

        for key in categorical_feats + ["game_zone", "game_campaignId"]:
            vocab = tft.vocabulary(inputs[key],
                                   vocab_filename=key,
                                   frequency_threshold=100)
            outputs[key] = tft.apply_vocabulary(inputs[key],
                                                vocab,
                                                default_value=0)

        outputs["label"] = inputs["label"]
        outputs["key"] = inputs["key"]

        return outputs
def preprocessing_fn(inputs):
    """Callback function for preprocessing inputs.

  Args:
    inputs: map from feature keys to raw not-yet-transformed features.

  Returns:
    Map from string feature key to transformed feature operations.
  """
    outputs = inputs.copy()

    # Compute a vocabulary based on the TOP-K current pages and labels seen in
    # the dataset.
    vocab = tft.vocabulary(tf.concat(
        [inputs[_CUR_PAGE_FEATURE_KEY], inputs[_LABEL_KEY]], axis=0),
                           top_k=_TOP_K,
                           vocab_filename=_VOCAB_FILENAME)

    # Apply the vocabulary to both the current page feature and the label,
    # converting the strings into integers.
    for k in [_CUR_PAGE_FEATURE_KEY, _LABEL_KEY]:
        # Out-of-vocab strings will be assigned the _TOP_K value.
        outputs[k] = tft.apply_vocabulary(inputs[k],
                                          vocab,
                                          default_value=_TOP_K)
    return outputs
Exemplo n.º 10
0
def preprocess_fn(inputs):
    """TensorFlow transform preprocessing function.

    Args:
        inputs: Dict of key to Tensor.
    Returns:
        Dict of key to transformed Tensor.
    """
    outputs = inputs.copy()
    # For all categorical columns except the label column, we generate a
    # vocabulary but do not modify the feature.  This vocabulary is instead
    # used in the trainer, by means of a feature column, to convert the feature
    # from a string to an integer id.
    for key in CATEGORICAL_COLUMNS:
        tft.vocabulary(inputs[key], vocab_filename=key)
    return outputs
Exemplo n.º 11
0
    def preprocessing_fn(
            inputs: Dict[Text, tf.Tensor]) -> Dict[Text, tf.Tensor]:
        """tf.transform's callback function for preprocessing inputs.

    Parameters
    ----------
      inputs: map from feature keys to raw not-yet-transformed features.

    Returns
    -------
      Map from string feature key to transformed feature operations.

    """
        outputs = {}
        for key in categorical_feature_keys + [label_key]:
            outputs[_transformed_name(key)] = _fill_in_missing(inputs[key])

            vocab_file_tensor = tft.vocabulary(outputs[_transformed_name(key)],
                                               vocab_filename=key)

            outputs[_transformed_name(key)] = tft.apply_vocabulary(
                outputs[_transformed_name(key)], vocab_file_tensor)

        # NOTE: This won't be correct in the incremental case since it's only using
        # the new examples to get the mean and variance.
        for key in numerical_feature_keys:
            outputs[_transformed_name(key)] = tf.expand_dims(
                tft.scale_to_z_score(_fill_in_missing(inputs[key])), axis=1)

        return outputs
Exemplo n.º 12
0
    def preprocessing_fn(
            inputs: Dict[Text, tf.Tensor]) -> Dict[Text, tf.Tensor]:
        """tf.transform's callback function for preprocessing inputs.

    Parameters
    ----------
      inputs: map from feature keys to raw not-yet-transformed features.

    Returns
    -------
      Map from string feature key to transformed feature operations.

    """
        outputs = {}
        outputs[_transformed_name(label_key)] = _fill_in_missing(
            inputs[label_key])

        vocab_file_tensor = tft.vocabulary(
            outputs[_transformed_name(label_key)], vocab_filename=label_key)

        outputs[_transformed_name(label_key)] = tft.apply_vocabulary(
            outputs[_transformed_name(label_key)], vocab_file_tensor)

        outputs[_transformed_name(pixel_key)] = tf.concat(
            [_fill_in_missing(inputs[str(i + 1)]) for i in range(num_pixels)],
            axis=1)

        # NOTE: This won't be correct in the incremental case since it's only using
        # the new examples to get the mean and variance.
        outputs[_transformed_name('pixels')] = tft.scale_to_0_1(
            outputs[_transformed_name('pixels')])

        return outputs
Exemplo n.º 13
0
def _preprocessing_fn_for_common_optimize_traversal(inputs):
    _ = tft.vocabulary(inputs['s'])
    x = inputs['x']
    x_mean = tft.mean(x, name='x')
    x_square_deviations = tf.square(x - x_mean)
    x_var = tft.mean(x_square_deviations, name='x_square_deviations')
    x_normalized = (x - x_mean) / tf.sqrt(x_var)
    return {'x_normalized': x_normalized}
Exemplo n.º 14
0
def preprocessing_fn(inputs):
    """Transform preprocessing_fn."""

    # generate a shared vocabulary.
    _ = tft.vocabulary(tf.concat([
        inputs[features.QUERY_TOKENS].flat_values,
        inputs[features.DOCUMENT_TOKENS].flat_values
    ],
                                 axis=0),
                       vocab_filename='shared_vocab')
    return inputs
Exemplo n.º 15
0
    def transform_to_tfrecord(self, inputs):
        """Preprocess raw input columns into transformed columns."""
        outputs = inputs.copy()

        for key in enabled_number_features:
            outputs[key] = tft.scale_to_z_score((outputs[key]))

        # for key in OPTIONAL_NUMERIC_FEATURE_KEYS:
        #     # This is a SparseTensor because it is optional. Here we fill in a default
        #     # value when it is missing.
        #     dense = tf.sparse_to_dense(outputs[key].indices,
        #                                [outputs[key].dense_shape[0], 1],
        #                                outputs[key].values, default_value=0.)
        #     # Reshaping from a batch of vectors of size 1 to a batch to scalars.
        #     dense = tf.squeeze(dense, axis=1)
        #     outputs[key] = tft.scale_to_0_1(dense)

        for key in enabled_vocabulary_features:
            tft.vocabulary(inputs[key], vocab_filename=key)

        return outputs
Exemplo n.º 16
0
def _preprocessing_fn_with_table(inputs):
    x = inputs['x']
    x_vocab = tft.vocabulary(x, name='x')
    initializer = tf.lookup.TextFileInitializer(
        x_vocab,
        key_dtype=tf.string,
        key_index=tf.lookup.TextFileIndex.WHOLE_LINE,
        value_dtype=tf.int64,
        value_index=tf.lookup.TextFileIndex.LINE_NUMBER)
    table = tf.lookup.StaticHashTable(initializer, default_value=-1)
    x_integerized = table.lookup(x)
    return {'x_integerized': x_integerized}
Exemplo n.º 17
0
        def preprocessing_fn(inputs):
            _ = tft.vocabulary(inputs['s'],
                               labels=inputs['label'],
                               store_frequency=True,
                               vocab_filename=mi_vocab_name,
                               min_diff_from_avg=0.1,
                               use_adjusted_mutual_info=False)

            _ = tft.vocabulary(inputs['s'],
                               labels=inputs['label'],
                               store_frequency=True,
                               vocab_filename=adjusted_mi_vocab_name,
                               min_diff_from_avg=1.0,
                               use_adjusted_mutual_info=True)

            _ = tft.vocabulary(inputs['s'],
                               weights=inputs['weight'],
                               store_frequency=True,
                               vocab_filename=weighted_frequency_vocab_name,
                               use_adjusted_mutual_info=False)
            return inputs
Exemplo n.º 18
0
        def preprocessing_fn(inputs):

            _ = tft.vocabulary(inputs['s'])

            _ = tft.bucketize(inputs['x'], 2, name='bucketize')

            return {
                'x_min':
                tft.min(inputs['x'], name='x') + tf.zeros_like(inputs['x']),
                'x_mean':
                tft.mean(inputs['x'], name='x') + tf.zeros_like(inputs['x']),
                'y_min':
                tft.min(inputs['y'], name='y') + tf.zeros_like(inputs['y']),
                'y_mean':
                tft.mean(inputs['y'], name='y') + tf.zeros_like(inputs['y']),
            }
Exemplo n.º 19
0
def _preprocess_tft(raw_data, user_freq, item_freq):
    """Creates vocabularies for users and items and maps their ids to ints.

  Args:
    raw_data: a dict of shape {$user_key: tensor, $item_key: tensor, ...}.
    user_freq: minimum frequency of a user to include it in the user vocab.
    item_freq: minimum frequency of an item to include it in the item vocab.

  Returns:
    A dict containing int ids cooresponding to a user_id and item_id and other
      features: {$user_key: $user_id, $item_key: $item_id, ...}.
  """
    features = {
        feature: raw_data[feature]
        for feature in constants.BQ_FEATURES
    }
    item_vocab = tft.vocabulary(raw_data[constants.ITEM_KEY],
                                vocab_filename=constants.ITEM_VOCAB_NAME,
                                frequency_threshold=item_freq)
    tft_features = {
        constants.TFT_USER_KEY:
        tft.compute_and_apply_vocabulary(
            raw_data[constants.USER_KEY],
            vocab_filename=constants.USER_VOCAB_NAME,
            frequency_threshold=user_freq,
            default_value=constants.TFT_DEFAULT_ID),
        constants.TFT_ITEM_KEY:
        tft.apply_vocabulary(raw_data[constants.ITEM_KEY],
                             item_vocab,
                             default_value=constants.TFT_DEFAULT_ID),
        constants.TFT_ARTIST_KEY:
        tft.compute_and_apply_vocabulary(
            raw_data[constants.ARTIST_KEY],
            vocab_filename=constants.ARTIST_VOCAB_NAME,
            default_value=constants.TFT_DEFAULT_ID),
        constants.TFT_TAGS_KEY:
        tft.compute_and_apply_vocabulary(
            raw_data[constants.TAGS_KEY],
            vocab_filename=constants.TAG_VOCAB_NAME,
            default_value=constants.TFT_DEFAULT_ID),
        constants.TFT_TOP_10_KEY:
        tft.apply_vocabulary(raw_data[constants.TOP_10_KEY],
                             item_vocab,
                             default_value=constants.TFT_DEFAULT_ID),
    }
    features.update(tft_features)
    return features
Exemplo n.º 20
0
def preprocessing_fn(inputs):
    logging.info("Running preprocessing function")

    config = ClassificationConfig.from_env()

    outputs = dict()
    read_image_blob = lambda x: read_tensor_from_image_file(
        x, input_height=config.image_height, input_width=config.image_width)
    # image tensor
    outputs[config.image_key] = tf.compat.v2.map_fn(
        read_image_blob, inputs[config.raw_image_key].values, dtype=tf.float32)
    # label tensor
    # we store input to output and create a vocabulary to be used later on
    _ = tft.vocabulary(inputs['label'], vocab_filename="label_encoder")
    outputs[config.label_key] = inputs[config.raw_label_key]

    return outputs
Exemplo n.º 21
0
def _preprocessing_fn_with_packable_analyzer_single_phase(inputs):
  x, y = inputs['x'], inputs['y']
  x_mean = tft.mean(x, name='x')
  x_centered = x - x_mean
  y_mean = tft.mean(y, name='y')
  y_centered = y - y_mean
  z = inputs['z']
  z_vocab = tft.vocabulary(z, name='z')
  initializer = tf.lookup.TextFileInitializer(
      z_vocab,
      key_dtype=tf.string,
      key_index=tf.lookup.TextFileIndex.WHOLE_LINE,
      value_dtype=tf.int64,
      value_index=tf.lookup.TextFileIndex.LINE_NUMBER)
  table = tf.lookup.StaticHashTable(initializer, default_value=-1)
  z_integerized = table.lookup(z)
  return {'x_centered': x_centered, 'y_centered': y_centered,
          'z_integerized': z_integerized}
Exemplo n.º 22
0
def _default_preprocessing_fn(inputs, input_features):
    outputs = {}

    for key in input_features["numerical_default_encoding"]:
        outputs[key] = tf.cast(tft.bucketize(inputs[key], 20),
                               tf.float32) / 20.0 - 0.5

    for key in input_features["categorical_default_encoding"]:
        vocab = tft.vocabulary(inputs[key],
                               vocab_filename=key,
                               frequency_threshold=100)
        outputs[key] = tft.apply_vocabulary(inputs[key],
                                            vocab,
                                            default_value=0)

    if "label" in input_features:
        outputs["label"] = inputs[input_features["label"]]

    return outputs
Exemplo n.º 23
0
    def preprocessing_fn(inputs):

      _ = tft.vocabulary(inputs['s'], vocab_filename='vocab1')

      _ = tft.bucketize(inputs['x'], 2, name='bucketize')

      return {
          'x_min':
              tft.min(inputs['x'], name='x') + tf.zeros_like(inputs['x']),
          'x_mean':
              tft.mean(inputs['x'], name='x') + tf.zeros_like(inputs['x']),
          'y_min':
              tft.min(inputs['y'], name='y') + tf.zeros_like(inputs['y']),
          'y_mean':
              tft.mean(inputs['y'], name='y') + tf.zeros_like(inputs['y']),
          's_integerized':
              tft.compute_and_apply_vocabulary(
                  inputs['s'],
                  labels=inputs['label'],
                  use_adjusted_mutual_info=True),
      }
def preprocessing_fn(inputs):
    """
    Preprocess data inputs.
    This a callback function for tfx.components.Transform

    Parameters
    ----------
    inputs : dict, tensorflow_transform data
        Data beging fed into tfx.components.Transform
        Map from feature keys to raw not-yet-transformed features.

    Returns
    -------
    output: dict
        Map from string feature key to transformed feature operations.
    """
    # String to integer indexing
    content = inputs["InSeasonSeries_Id"]
    token = inputs["token"]
    token_count = inputs["token_count"]
    vocab_uri = tft.vocabulary(
        tf.concat([content, token], axis=0),
        vocab_filename="node_vocab.txt",
        name="node_vocab",
    )
    # Logging
    logging.info(f"graph vocabulary uri: {vocab_uri}")

    # output as a dict
    output = {}
    output["InSeasonSeries_Id"] = tft.apply_vocabulary(
        content, deferred_vocab_filename_tensor=vocab_uri, default_value=-1)
    output["token"] = tft.apply_vocabulary(
        token, deferred_vocab_filename_tensor=vocab_uri, default_value=-1)
    output["weight"] = tf.constant([1.0], dtype="float32") / tf.cast(
        token_count, "float32")

    return output
Exemplo n.º 25
0
def preprocessing_fn(inputs, input_features):
    """Preprocess input columns into transformed columns."""

    outputs = _default_preprocessing_fn(inputs, input_features)

    outputs["campaignCost_mod"] = inputs["campaignCost"] / 100.0

    inputs["game_zone"] = tf.string_join(
        [inputs["sourceGameId"], inputs["zone"]], separator="_")
    inputs["game_campaignId"] = tf.string_join(
        [inputs["sourceGameId"], inputs["campaignId"]], separator="_")

    for key in ["game_zone", "game_campaignId"]:
        vocab = tft.vocabulary(inputs[key],
                               vocab_filename=key,
                               frequency_threshold=100)
        outputs[key] = tft.apply_vocabulary(inputs[key],
                                            vocab,
                                            default_value=0)

    outputs["key"] = inputs["key"]

    return outputs
            def preprocessing_fn_test(inputs):
                """Preprocess input columns into transformed columns."""
                context = inputs['Context']
                ground_truth_utterance = inputs['Ground Truth Utterance']
                distractor_0 = inputs['Distractor_0']
                distractor_1 = inputs['Distractor_1']
                distractor_2 = inputs['Distractor_2']
                distractor_3 = inputs['Distractor_3']
                distractor_4 = inputs['Distractor_4']
                distractor_5 = inputs['Distractor_5']
                distractor_6 = inputs['Distractor_6']
                distractor_7 = inputs['Distractor_7']
                distractor_8 = inputs['Distractor_8']
                vocab = tf.concat([
                    context, ground_truth_utterance, distractor_0,
                    distractor_1, distractor_2, distractor_3, distractor_4,
                    distractor_5, distractor_6, distractor_7, distractor_8
                ], 0)

                context_tokens = tf.compat.v1.string_split(context, DELIMITERS)
                ground_truth_utterance_tokens = tf.compat.v1.string_split(
                    ground_truth_utterance, DELIMITERS)
                distractor_0_tokens = tf.compat.v1.string_split(
                    distractor_0, DELIMITERS)
                distractor_1_tokens = tf.compat.v1.string_split(
                    distractor_1, DELIMITERS)
                distractor_2_tokens = tf.compat.v1.string_split(
                    distractor_2, DELIMITERS)
                distractor_3_tokens = tf.compat.v1.string_split(
                    distractor_3, DELIMITERS)
                distractor_4_tokens = tf.compat.v1.string_split(
                    distractor_4, DELIMITERS)
                distractor_5_tokens = tf.compat.v1.string_split(
                    distractor_5, DELIMITERS)
                distractor_6_tokens = tf.compat.v1.string_split(
                    distractor_6, DELIMITERS)
                distractor_7_tokens = tf.compat.v1.string_split(
                    distractor_7, DELIMITERS)
                distractor_8_tokens = tf.compat.v1.string_split(
                    distractor_8, DELIMITERS)

                vocab_tokens = tf.compat.v1.string_split(vocab, DELIMITERS)

                vocab_mapping_file_path = tft.vocabulary(
                    vocab_tokens, vocab_filename='anantvir_test_vocab')

                mapped_context = tft.apply_vocabulary(
                    context_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)
                mapped_ground_truth_utterance = tft.apply_vocabulary(
                    ground_truth_utterance_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)
                mapped_distractor_0 = tft.apply_vocabulary(
                    distractor_0_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)
                mapped_distractor_1 = tft.apply_vocabulary(
                    distractor_0_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)
                mapped_distractor_2 = tft.apply_vocabulary(
                    distractor_0_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)
                mapped_distractor_3 = tft.apply_vocabulary(
                    distractor_0_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)
                mapped_distractor_4 = tft.apply_vocabulary(
                    distractor_0_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)
                mapped_distractor_5 = tft.apply_vocabulary(
                    distractor_0_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)
                mapped_distractor_6 = tft.apply_vocabulary(
                    distractor_0_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)
                mapped_distractor_7 = tft.apply_vocabulary(
                    distractor_0_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)
                mapped_distractor_8 = tft.apply_vocabulary(
                    distractor_0_tokens,
                    deferred_vocab_filename_tensor=vocab_mapping_file_path)

                return {
                    'Context': mapped_context,
                    'Ground Truth Utterance': mapped_ground_truth_utterance,
                    'Distractor_0': mapped_distractor_0,
                    'Distractor_1': mapped_distractor_1,
                    'Distractor_2': mapped_distractor_2,
                    'Distractor_3': mapped_distractor_3,
                    'Distractor_4': mapped_distractor_4,
                    'Distractor_5': mapped_distractor_5,
                    'Distractor_6': mapped_distractor_6,
                    'Distractor_7': mapped_distractor_7,
                    'Distractor_8': mapped_distractor_8,
                }
Exemplo n.º 27
0
def _preprocessing_fn_with_table(inputs):
    x = inputs['x']
    x_vocab = tft.vocabulary(x, name='x')
    table = tf.contrib.lookup.index_table_from_file(x_vocab)
    x_integerized = table.lookup(x)
    return {'x_integerized': x_integerized}