def _get_estimator(self,
                     train_distribute,
                     eval_distribute,
                     remote_cluster=None):
    input_dimension = LABEL_DIMENSION
    linear_feature_columns = [
        feature_column.numeric_column("x", shape=(input_dimension,))
    ]
    dnn_feature_columns = [
        feature_column.numeric_column("x", shape=(input_dimension,))
    ]

    return dnn_linear_combined.DNNLinearCombinedRegressor(
        linear_feature_columns=linear_feature_columns,
        dnn_hidden_units=(2, 2),
        dnn_feature_columns=dnn_feature_columns,
        label_dimension=LABEL_DIMENSION,
        model_dir=self._model_dir,
        dnn_optimizer=adagrad.AdagradOptimizer(0.001),
        linear_optimizer=adagrad.AdagradOptimizer(0.001),
        config=run_config_lib.RunConfig(
            experimental_distribute=DistributeConfig(
                train_distribute=train_distribute,
                eval_distribute=eval_distribute,
                remote_cluster=remote_cluster)))
示例#2
0
  def test_parse_features(self):
    """Tests the various behaviours of kmeans._parse_features_if_necessary."""

    # No-op if a tensor is passed in.
    features = constant_op.constant(self.points)
    parsed_features = kmeans_lib._parse_features_if_necessary(features, None)
    self.assertAllEqual(features, parsed_features)

    # All values from a feature dict are transformed into a tensor.
    feature_dict = {
        'x': [[point[0]] for point in self.points],
        'y': [[point[1]] for point in self.points]
    }
    parsed_feature_dict = kmeans_lib._parse_features_if_necessary(
        feature_dict, None)
    self._parse_feature_dict_helper(features, parsed_feature_dict)

    # Only the feature_columns of a feature dict are transformed into a tensor.
    feature_dict_with_extras = {
        'foo': 'bar',
        'x': [[point[0]] for point in self.points],
        'baz': {'fizz': 'buzz'},
        'y': [[point[1]] for point in self.points]
    }
    feature_columns = [fc.numeric_column(key='x'), fc.numeric_column(key='y')]
    parsed_feature_dict = kmeans_lib._parse_features_if_necessary(
        feature_dict_with_extras, feature_columns)
    self._parse_feature_dict_helper(features, parsed_feature_dict)
  def testTrainEvaluateInferDoesNotThrowErrorWithNoDnnInput(self):
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 3
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    est = estimator.CoreDNNBoostedTreeCombinedEstimator(
        head=head_fn,
        dnn_hidden_units=[1],
        dnn_feature_columns=[core_feature_column.numeric_column("x")],
        tree_learner_config=learner_config,
        num_trees=1,
        tree_examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        dnn_steps_to_train=10,
        dnn_input_layer_to_tree=False,
        tree_feature_columns=[core_feature_column.numeric_column("x")])

    # Train for a few steps.
    est.train(input_fn=_train_input_fn, steps=1000)
    # 10 steps for dnn, 3  for 1 tree of depth 3 + 1 after the tree finished
    self._assert_checkpoint(est.model_dir, global_step=15)
    res = est.evaluate(input_fn=_eval_input_fn, steps=1)
    self.assertLess(0.5, res["auc"])
    est.predict(input_fn=_eval_input_fn)
示例#4
0
  def testRankingDontThrowExceptionForForEstimator(self):
    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 1
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    est = estimator.CoreGradientBoostedDecisionTreeRanker(
        head=head_fn,
        learner_config=learner_config,
        num_trees=1,
        examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        feature_columns=[
            core_feature_column.numeric_column("f1"),
            core_feature_column.numeric_column("f2")
        ],
        ranking_model_pair_keys=("a", "b"))

    # Train for a few steps.
    est.train(input_fn=_ranking_train_input_fn, steps=1000)
    est.evaluate(input_fn=_ranking_train_input_fn, steps=1)
    est.predict(input_fn=_infer_ranking_train_input_fn)
示例#5
0
  def testWithFeatureColumns(self):
    head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
        n_classes=3, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    hparams = tensor_forest.ForestHParams(
        num_trees=3,
        max_nodes=1000,
        num_classes=3,
        num_features=4,
        split_after_samples=20,
        inference_tree_paths=True)

    est = random_forest.CoreTensorForestEstimator(
        hparams.fill(),
        head=head_fn,
        feature_columns=[core_feature_column.numeric_column('x')])

    iris = base.load_iris()
    data = {'x': iris.data.astype(np.float32)}
    labels = iris.target.astype(np.int32)

    input_fn = numpy_io.numpy_input_fn(
        x=data, y=labels, batch_size=150, num_epochs=None, shuffle=False)

    est.train(input_fn=input_fn, steps=100)
    res = est.evaluate(input_fn=input_fn, steps=1)

    self.assertEqual(1.0, res['accuracy'])
    self.assertAllClose(0.55144483, res['loss'])
示例#6
0
  def testFitAndEvaluateMultiClassFullDontThrowException(self):
    n_classes = 3
    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = n_classes
    learner_config.constraints.max_tree_depth = 1
    learner_config.multi_class_strategy = (
        learner_pb2.LearnerConfig.FULL_HESSIAN)

    head_fn = estimator.core_multiclass_head(n_classes=n_classes)

    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    classifier = estimator.CoreGradientBoostedDecisionTreeEstimator(
        learner_config=learner_config,
        head=head_fn,
        num_trees=1,
        center_bias=False,
        examples_per_layer=7,
        model_dir=model_dir,
        config=config,
        feature_columns=[core_feature_column.numeric_column("x")])

    classifier.train(input_fn=_multiclass_train_input_fn, steps=100)
    classifier.evaluate(input_fn=_multiclass_train_input_fn, steps=1)
    classifier.predict(input_fn=_eval_input_fn)
示例#7
0
  def test_linear_model_numpy_input_fn(self):
    price = fc.numeric_column('price')
    price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,])
    body_style = fc.categorical_column_with_vocabulary_list(
        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])

    input_fn = numpy_io.numpy_input_fn(
        x={
            'price': np.array([-1., 2., 13., 104.]),
            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
        batch_size=2,
        shuffle=False)
    features = input_fn()
    net = fc.linear_model(features, [price_buckets, body_style])
    # self.assertEqual(1 + 3 + 5, net.shape[1])
    with self._initialized_session() as sess:
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

      bias = self._get_linear_model_bias()
      price_buckets_var = self._get_linear_model_column_var(price_buckets)
      body_style_var = self._get_linear_model_column_var(body_style)

      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
      sess.run(bias.assign([5.]))

      self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net))

      coord.request_stop()
      coord.join(threads)
示例#8
0
 def test_ar_lstm_regressor(self):
   dtype = dtypes.float32
   model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
   exogenous_feature_columns = (
       feature_column.numeric_column("exogenous"),
   )
   estimator = estimators.LSTMAutoRegressor(
       periodicities=10,
       input_window_size=10,
       output_window_size=6,
       model_dir=model_dir,
       num_features=1,
       extra_feature_columns=exogenous_feature_columns,
       num_units=10,
       config=_SeedRunConfig())
   times = numpy.arange(20, dtype=numpy.int64)
   values = numpy.arange(20, dtype=dtype.as_numpy_dtype)
   exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype)
   features = {
       feature_keys.TrainEvalFeatures.TIMES: times,
       feature_keys.TrainEvalFeatures.VALUES: values,
       "exogenous": exogenous
   }
   train_input_fn = input_pipeline.RandomWindowInputFn(
       input_pipeline.NumpyReader(features), shuffle_seed=2, num_threads=1,
       batch_size=16, window_size=16)
   eval_input_fn = input_pipeline.RandomWindowInputFn(
       input_pipeline.NumpyReader(features), shuffle_seed=3, num_threads=1,
       batch_size=16, window_size=16)
   estimator.train(input_fn=train_input_fn, steps=1)
   evaluation = estimator.evaluate(
       input_fn=eval_input_fn, steps=1)
   self.assertAllEqual(evaluation["loss"], evaluation["average_loss"])
   self.assertAllEqual([], evaluation["loss"].shape)
  def testFitAndEvaluateDontThrowExceptionWithCore(self):
    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 1
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    # Use core head
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

    classifier = estimator.DNNBoostedTreeCombinedEstimator(
        head=head_fn,
        dnn_hidden_units=[1],
        # Use core feature columns
        dnn_feature_columns=[core_feature_column.numeric_column("x")],
        tree_learner_config=learner_config,
        num_trees=1,
        tree_examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        dnn_steps_to_train=10,
        dnn_input_layer_to_tree=True,
        tree_feature_columns=[],
        use_core_versions=True)

    classifier.fit(input_fn=_train_input_fn, steps=15)
    classifier.evaluate(input_fn=_eval_input_fn, steps=1)
示例#10
0
 def _serving_input_receiver_fn():
   """A receiver function to be passed to export_savedmodel."""
   times_column = feature_column.numeric_column(
       key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64)
   values_column = feature_column.numeric_column(
       key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype,
       shape=(self._model.num_features,))
   parsed_features_no_sequence = (
       feature_column.make_parse_example_spec(
           list(self._model.exogenous_feature_columns)
           + [times_column, values_column]))
   parsed_features = {}
   for key, feature_spec in parsed_features_no_sequence.items():
     if isinstance(feature_spec, parsing_ops.FixedLenFeature):
       if key == feature_keys.TrainEvalFeatures.VALUES:
         parsed_features[key] = feature_spec._replace(
             shape=((values_proto_length,)
                    + feature_spec.shape))
       else:
         parsed_features[key] = feature_spec._replace(
             shape=((filtering_length + prediction_length,)
                    + feature_spec.shape))
     elif feature_spec.dtype == dtypes.string:
       parsed_features[key] = parsing_ops.FixedLenFeature(
           shape=(filtering_length + prediction_length,),
           dtype=dtypes.string)
     else:  # VarLenFeature
       raise ValueError("VarLenFeatures not supported, got %s for key %s"
                        % (feature_spec, key))
   tfexamples = array_ops.placeholder(
       shape=[default_batch_size], dtype=dtypes.string, name="input")
   features = parsing_ops.parse_example(
       serialized=tfexamples,
       features=parsed_features)
   features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze(
       features[feature_keys.TrainEvalFeatures.TIMES], axis=-1)
   features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast(
       features[feature_keys.TrainEvalFeatures.VALUES],
       dtype=self._model.dtype)[:, :filtering_length]
   features.update(
       self._model_start_state_placeholders(
           batch_size_tensor=array_ops.shape(
               features[feature_keys.TrainEvalFeatures.TIMES])[0],
           static_batch_size=default_batch_size))
   return export_lib.ServingInputReceiver(
       features, {"examples": tfexamples})
  def test_subclassed_model_with_feature_columns(self):
    col_a = fc.numeric_column('a')
    col_b = fc.numeric_column('b')

    dnn_model = TestDNNModel([col_a, col_b], 20)

    dnn_model.compile(
        optimizer='rmsprop',
        loss='categorical_crossentropy',
        metrics=['accuracy'],
        run_eagerly=testing_utils.should_run_eagerly())

    x = {'a': np.random.random((10, 1)), 'b': np.random.random((10, 1))}
    y = np.random.randint(20, size=(10, 1))
    y = keras.utils.to_categorical(y, num_classes=20)
    dnn_model.fit(x=x, y=y, epochs=1, batch_size=5)
    dnn_model.fit(x=x, y=y, epochs=1, batch_size=5)
    dnn_model.evaluate(x=x, y=y, batch_size=5)
    dnn_model.predict(x=x, batch_size=5)
  def test_subclassed_model_with_feature_columns_with_ds_input(self):
    col_a = fc.numeric_column('a')
    col_b = fc.numeric_column('b')

    dnn_model = TestDNNModel([col_a, col_b], 20)

    dnn_model.compile(
        optimizer='rmsprop',
        loss='categorical_crossentropy',
        metrics=['accuracy'],
        run_eagerly=testing_utils.should_run_eagerly())

    y = np.random.randint(20, size=(100, 1))
    y = keras.utils.to_categorical(y, num_classes=20)
    x = {'a': np.random.random((100, 1)), 'b': np.random.random((100, 1))}
    ds1 = dataset_ops.Dataset.from_tensor_slices(x)
    ds2 = dataset_ops.Dataset.from_tensor_slices(y)
    ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5)
    dnn_model.fit(ds, steps_per_epoch=1)
    dnn_model.fit(ds, steps_per_epoch=1)
    dnn_model.evaluate(ds, steps=1)
    dnn_model.predict(ds, steps=1)
  def DISABLED_test_function_model_feature_layer_input(self):
    col_a = fc.numeric_column('a')
    col_b = fc.numeric_column('b')

    feature_layer = fc.DenseFeatures([col_a, col_b], name='fc')
    dense = keras.layers.Dense(4)

    # This seems problematic.... We probably need something for DenseFeatures
    # the way Input is for InputLayer.
    output = dense(feature_layer)

    model = keras.models.Model([feature_layer], [output])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(
        optimizer,
        loss,
        metrics=[metrics_module.CategoricalAccuracy(), 'mae'],
        loss_weights=loss_weights)

    data = ({'a': np.arange(10), 'b': np.arange(10)}, np.arange(10, 20))
    print(model.fit(*data, epochs=1))
  def testTrainEvaluateWithDnnForInputAndTreeForPredict(self):
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 3
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    est = estimator.CoreDNNBoostedTreeCombinedEstimator(
        head=head_fn,
        dnn_hidden_units=[1],
        dnn_feature_columns=[core_feature_column.numeric_column("x")],
        tree_learner_config=learner_config,
        num_trees=1,
        tree_examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        dnn_steps_to_train=10,
        dnn_input_layer_to_tree=True,
        predict_with_tree_only=True,
        dnn_to_tree_distillation_param=(0.5, None),
        tree_feature_columns=[])

    # Train for a few steps.
    est.train(input_fn=_train_input_fn, steps=1000)
    res = est.evaluate(input_fn=_eval_input_fn, steps=1)
    self.assertLess(0.5, res["auc"])
    est.predict(input_fn=_eval_input_fn)
    serving_input_fn = (
        export.build_parsing_serving_input_receiver_fn(
            feature_spec={"x": parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32)}))
    base_exporter = exporter.FinalExporter(
        name="Servo",
        serving_input_receiver_fn=serving_input_fn,
        assets_extra=None)
    export_path = os.path.join(model_dir, "export")
    base_exporter.export(
        est,
        export_path=export_path,
        checkpoint_path=None,
        eval_result={},
        is_the_final_export=True)
示例#15
0
  def test_functional_input_layer_with_numpy_input_fn(self):
    embedding_values = (
        (1., 2., 3., 4., 5.),  # id 0
        (6., 7., 8., 9., 10.),  # id 1
        (11., 12., 13., 14., 15.)  # id 2
    )
    def _initializer(shape, dtype, partition_info):
      del shape, dtype, partition_info
      return embedding_values

    # price has 1 dimension in input_layer
    price = fc.numeric_column('price')
    body_style = fc.categorical_column_with_vocabulary_list(
        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
    # one_hot_body_style has 3 dims in input_layer.
    one_hot_body_style = fc.indicator_column(body_style)
    # embedded_body_style has 5 dims in input_layer.
    embedded_body_style = fc.embedding_column(body_style, dimension=5,
                                              initializer=_initializer)

    input_fn = numpy_io.numpy_input_fn(
        x={
            'price': np.array([11., 12., 13., 14.]),
            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
        batch_size=2,
        shuffle=False)
    features = input_fn()
    net = fc.input_layer(features,
                         [price, one_hot_body_style, embedded_body_style])
    self.assertEqual(1 + 3 + 5, net.shape[1])
    with self._initialized_session() as sess:
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

      # Each row is formed by concatenating `embedded_body_style`,
      # `one_hot_body_style`, and `price` in order.
      self.assertAllEqual(
          [[11., 12., 13., 14., 15., 0., 0., 1., 11.],
           [1., 2., 3., 4., 5., 1., 0., 0., 12]],
          sess.run(net))

      coord.request_stop()
      coord.join(threads)
示例#16
0
  def testFitAndEvaluateDontThrowExceptionWithCoreForClassifier(self):
    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 1
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    classifier = estimator.GradientBoostedDecisionTreeClassifier(
        learner_config=learner_config,
        num_trees=1,
        examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        feature_columns=[core_feature_column.numeric_column("x")],
        use_core_libs=True)

    classifier.fit(input_fn=_train_input_fn, steps=15)
    classifier.evaluate(input_fn=_eval_input_fn, steps=1)
    classifier.export(self._export_dir_base)
  def test_sequential_model(self):
    columns = [fc.numeric_column('a')]
    model = keras.models.Sequential([
        fc.DenseFeatures(columns),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(20, activation='softmax')
    ])
    model.compile(
        optimizer='rmsprop',
        loss='categorical_crossentropy',
        metrics=['accuracy'],
        run_eagerly=testing_utils.should_run_eagerly())

    x = {'a': np.random.random((10, 1))}
    y = np.random.randint(20, size=(10, 1))
    y = keras.utils.to_categorical(y, num_classes=20)
    model.fit(x, y, epochs=1, batch_size=5)
    model.fit(x, y, epochs=1, batch_size=5)
    model.evaluate(x, y, batch_size=5)
    model.predict(x, batch_size=5)
  def test_sequential_model_with_ds_input(self):
    columns = [fc.numeric_column('a')]
    model = keras.models.Sequential([
        fc.DenseFeatures(columns),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(20, activation='softmax')
    ])
    model.compile(
        optimizer='rmsprop',
        loss='categorical_crossentropy',
        metrics=['accuracy'],
        run_eagerly=testing_utils.should_run_eagerly())

    y = np.random.randint(20, size=(100, 1))
    y = keras.utils.to_categorical(y, num_classes=20)
    x = {'a': np.random.random((100, 1))}
    ds1 = dataset_ops.Dataset.from_tensor_slices(x)
    ds2 = dataset_ops.Dataset.from_tensor_slices(y)
    ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5)
    model.fit(ds, steps_per_epoch=1)
    model.fit(ds, steps_per_epoch=1)
    model.evaluate(ds, steps=1)
    model.predict(ds, steps=1)
示例#19
0
  def testTrainEvaluateInferDoesNotThrowError(self):
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 1
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    est = estimator.CoreGradientBoostedDecisionTreeEstimator(
        head=head_fn,
        learner_config=learner_config,
        num_trees=1,
        examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        feature_columns=[core_feature_column.numeric_column("x")])

    # Train for a few steps.
    est.train(input_fn=_train_input_fn, steps=1000)
    est.evaluate(input_fn=_eval_input_fn, steps=1)
    est.predict(input_fn=_eval_input_fn)
示例#20
0
  def test_sequential_model_with_crossed_column(self):
    feature_columns = []
    age_buckets = fc.bucketized_column(
        fc.numeric_column('age'),
        boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
    feature_columns.append(age_buckets)

    # indicator cols
    thal = fc.categorical_column_with_vocabulary_list(
        'thal', ['fixed', 'normal', 'reversible'])

    crossed_feature = fc.crossed_column([age_buckets, thal],
                                        hash_bucket_size=1000)
    crossed_feature = fc.indicator_column(crossed_feature)
    feature_columns.append(crossed_feature)

    feature_layer = fc.DenseFeatures(feature_columns)

    model = keras.models.Sequential([
        feature_layer,
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])

    age_data = np.random.randint(10, 100, size=100)
    thal_data = np.random.choice(['fixed', 'normal', 'reversible'], size=100)
    inp_x = {'age': age_data, 'thal': thal_data}
    inp_y = np.random.randint(0, 1, size=100)
    ds = dataset_ops.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5)
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'],)
    model.fit(ds, epochs=1)
    model.fit(ds, epochs=1)
    model.evaluate(ds)
    model.predict(ds)
示例#21
0
    def test_saving_with_dense_features(self):
        cols = [
            feature_column_lib.numeric_column('a'),
            feature_column_lib.indicator_column(
                feature_column_lib.categorical_column_with_vocabulary_list(
                    'b', ['one', 'two']))
        ]
        input_layers = {
            'a': keras.layers.Input(shape=(1, ), name='a'),
            'b': keras.layers.Input(shape=(1, ), name='b', dtype='string')
        }

        fc_layer = dense_features.DenseFeatures(cols)(input_layers)
        output = keras.layers.Dense(10)(fc_layer)

        model = keras.models.Model(input_layers, output)

        model.compile(loss=keras.losses.MSE,
                      optimizer='rmsprop',
                      metrics=[keras.metrics.categorical_accuracy])

        config = model.to_json()
        loaded_model = model_config.model_from_json(config)

        inputs_a = np.arange(10).reshape(10, 1)
        inputs_b = np.arange(10).reshape(10, 1).astype('str')

        with self.cached_session():
            # Initialize tables for V1 lookup.
            if not context.executing_eagerly():
                self.evaluate(lookup_ops.tables_initializer())

            self.assertLen(
                loaded_model.predict({
                    'a': inputs_a,
                    'b': inputs_b
                }), 10)
示例#22
0
    def test_raise_error_with_multi_worker(self):
        tf_config = {
            'cluster': {
                run_config_lib.TaskType.CHIEF: ['host0:0'],
                run_config_lib.TaskType.WORKER:
                ['host3:3', 'host4:4', 'host5:5']
            },
            'task': {
                'type': run_config_lib.TaskType.CHIEF,
                'index': 0
            }
        }
        with test.mock.patch.dict('os.environ',
                                  {'TF_CONFIG': json.dumps(tf_config)}):
            dnn = estimator_lib.DNNClassifier(
                feature_columns=[feature_column_lib.numeric_column('x')],
                hidden_units=[3, 1])

        def eval_input_fn():
            pass

        with self.assertRaisesRegexp(ValueError,
                                     'supports only single machine'):
            hooks_lib.InMemoryEvaluatorHook(dnn, eval_input_fn)
    def test_string_input(self):
        x = {
            'age': np.random.random((1024, 1)),
            'cabin': np.array(['a'] * 1024)
        }
        y = np.random.randint(2, size=(1024, 1))
        ds1 = dataset_ops.Dataset.from_tensor_slices(x)
        ds2 = dataset_ops.Dataset.from_tensor_slices(y)
        dataset = dataset_ops.Dataset.zip((ds1, ds2)).batch(4)
        categorical_cols = [
            fc.categorical_column_with_hash_bucket('cabin', 10)
        ]
        feature_cols = ([fc.numeric_column('age')] +
                        [fc.indicator_column(cc) for cc in categorical_cols])
        layers = [
            fc.DenseFeatures(feature_cols),
            keras.layers.Dense(128),
            keras.layers.Dense(1)
        ]

        model = keras.models.Sequential(layers)
        model.compile(keras.optimizers.SGD(0.1),
                      loss=keras.losses.BinaryCrossentropy())
        model.fit(dataset)
示例#24
0
    def test_forward_in_exported(self):
        def serving_input_fn():
            features_ph = {
                'x': array_ops.placeholder(dtypes.float32, [None]),
                'id': array_ops.placeholder(dtypes.int32, [None])
            }
            features = {
                key: array_ops.expand_dims(tensor, -1)
                for key, tensor in features_ph.items()
            }
            return estimator_lib.export.ServingInputReceiver(
                features, features_ph)

        def input_fn():
            return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]]

        # create estimator
        feature_columns = [fc.numeric_column('x')]
        estimator = linear.LinearRegressor(feature_columns)
        estimator.train(input_fn=input_fn, steps=1)
        estimator = extenders.forward_features(estimator, 'id')

        # export saved model
        export_dir, tmpdir = self._export_estimator(estimator,
                                                    serving_input_fn)

        # restore model
        predict_fn = from_saved_model(export_dir, signature_def_key='predict')
        predictions = predict_fn({'x': [3], 'id': [101]})

        # verify that 'id' exists in predictions
        self.assertIn('id', predictions)
        self.assertEqual(101, predictions['id'])

        # Clean up.
        gfile.DeleteRecursively(tmpdir)
示例#25
0
  def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self):
    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 1
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    # Use core head
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

    model = estimator.GradientBoostedDecisionTreeEstimator(
        head=head_fn,
        learner_config=learner_config,
        num_trees=1,
        examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        feature_columns=[core_feature_column.numeric_column("x")],
        use_core_libs=True)

    model.fit(input_fn=_train_input_fn, steps=15)
    model.evaluate(input_fn=_eval_input_fn, steps=1)
    model.export(self._export_dir_base)
示例#26
0
  def test_sequential_model_with_ds_input(self):
    columns = [fc.numeric_column('a')]
    model = keras.models.Sequential([
        fc.DenseFeatures(columns),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(20, activation='softmax')
    ])
    model.compile(
        optimizer='rmsprop',
        loss='categorical_crossentropy',
        metrics=['accuracy'],
        run_eagerly=testing_utils.should_run_eagerly(),
        experimental_run_tf_function=testing_utils.should_run_tf_function())

    y = np.random.randint(20, size=(100, 1))
    y = keras.utils.to_categorical(y, num_classes=20)
    x = {'a': np.random.random((100, 1))}
    ds1 = dataset_ops.Dataset.from_tensor_slices(x)
    ds2 = dataset_ops.Dataset.from_tensor_slices(y)
    ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5)
    model.fit(ds, steps_per_epoch=1)
    model.fit(ds, steps_per_epoch=1)
    model.evaluate(ds, steps=1)
    model.predict(ds, steps=1)
示例#27
0
    def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        # Use core head
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

        model = estimator.GradientBoostedDecisionTreeEstimator(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")],
            use_core_libs=True)

        model.fit(input_fn=_train_input_fn, steps=15)
        model.evaluate(input_fn=_eval_input_fn, steps=1)
        model.export(self._export_dir_base)
示例#28
0
 def test_key_should_be_list_of_string(self):
     estimator = linear.LinearRegressor([fc.numeric_column('x')])
     with self.assertRaisesRegexp(TypeError, 'should be a string'):
         extenders.forward_features(estimator, ['x', estimator])
示例#29
0
 def test_one_shot_prediction_head_export(self, estimator_factory):
   def _new_temp_dir():
     return os.path.join(test.get_temp_dir(), str(ops.uid()))
   model_dir = _new_temp_dir()
   categorical_column = feature_column.categorical_column_with_hash_bucket(
       key="categorical_exogenous_feature", hash_bucket_size=16)
   exogenous_feature_columns = [
       feature_column.numeric_column(
           "2d_exogenous_feature", shape=(2,)),
       feature_column.embedding_column(
           categorical_column=categorical_column, dimension=10)]
   estimator = estimator_factory(
       model_dir=model_dir,
       exogenous_feature_columns=exogenous_feature_columns,
       head_type=ts_head_lib.OneShotPredictionHead)
   train_features = {
       feature_keys.TrainEvalFeatures.TIMES: numpy.arange(
           20, dtype=numpy.int64),
       feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange(
           20, dtype=numpy.float32)[:, None], [1, 5]),
       "2d_exogenous_feature": numpy.ones([20, 2]),
       "categorical_exogenous_feature": numpy.array(
           ["strkey"] * 20)[:, None]
   }
   train_input_fn = input_pipeline.RandomWindowInputFn(
       input_pipeline.NumpyReader(train_features), shuffle_seed=2,
       num_threads=1, batch_size=16, window_size=16)
   estimator.train(input_fn=train_input_fn, steps=5)
   result = estimator.evaluate(input_fn=train_input_fn, steps=1)
   self.assertIn("average_loss", result)
   self.assertNotIn(feature_keys.State.STATE_TUPLE, result)
   input_receiver_fn = estimator.build_raw_serving_input_receiver_fn()
   export_location = estimator.export_saved_model(_new_temp_dir(),
                                                  input_receiver_fn)
   graph = ops.Graph()
   with graph.as_default():
     with session_lib.Session() as session:
       signatures = loader.load(
           session, [tag_constants.SERVING], export_location)
       self.assertEqual([feature_keys.SavedModelLabels.PREDICT],
                        list(signatures.signature_def.keys()))
       predict_signature = signatures.signature_def[
           feature_keys.SavedModelLabels.PREDICT]
       six.assertCountEqual(
           self,
           [feature_keys.FilteringFeatures.TIMES,
            feature_keys.FilteringFeatures.VALUES,
            "2d_exogenous_feature",
            "categorical_exogenous_feature"],
           predict_signature.inputs.keys())
       features = {
           feature_keys.TrainEvalFeatures.TIMES: numpy.tile(
               numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]),
           feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange(
               20, dtype=numpy.float32)[None, :, None], [2, 1, 5]),
           "2d_exogenous_feature": numpy.ones([2, 35, 2]),
           "categorical_exogenous_feature": numpy.tile(numpy.array(
               ["strkey"] * 35)[None, :, None], [2, 1, 1])
       }
       feeds = {
           graph.as_graph_element(input_value.name): features[input_key]
           for input_key, input_value in predict_signature.inputs.items()}
       fetches = {output_key: graph.as_graph_element(output_value.name)
                  for output_key, output_value
                  in predict_signature.outputs.items()}
       output = session.run(fetches, feed_dict=feeds)
       self.assertEqual((2, 15, 5), output["mean"].shape)
   # Build a parsing input function, then make a tf.Example for it to parse.
   export_location = estimator.export_saved_model(
       _new_temp_dir(),
       estimator.build_one_shot_parsing_serving_input_receiver_fn(
           filtering_length=20, prediction_length=15))
   graph = ops.Graph()
   with graph.as_default():
     with session_lib.Session() as session:
       example = example_pb2.Example()
       times = example.features.feature[feature_keys.TrainEvalFeatures.TIMES]
       values = example.features.feature[feature_keys.TrainEvalFeatures.VALUES]
       times.int64_list.value.extend(range(35))
       for i in range(20):
         values.float_list.value.extend(
             [float(i) * 2. + feature_number
              for feature_number in range(5)])
       real_feature = example.features.feature["2d_exogenous_feature"]
       categortical_feature = example.features.feature[
           "categorical_exogenous_feature"]
       for i in range(35):
         real_feature.float_list.value.extend([1, 1])
         categortical_feature.bytes_list.value.append(b"strkey")
       # Serialize the tf.Example for feeding to the Session
       examples = [example.SerializeToString()] * 2
       signatures = loader.load(
           session, [tag_constants.SERVING], export_location)
       predict_signature = signatures.signature_def[
           feature_keys.SavedModelLabels.PREDICT]
       ((_, input_value),) = predict_signature.inputs.items()
       feeds = {graph.as_graph_element(input_value.name): examples}
       fetches = {output_key: graph.as_graph_element(output_value.name)
                  for output_key, output_value
                  in predict_signature.outputs.items()}
       output = session.run(fetches, feed_dict=feeds)
       self.assertEqual((2, 15, 5), output["mean"].shape)
  def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate):
    label_dimension = 2
    input_dimension = label_dimension
    batch_size = 10
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)
    train_input_fn = self.dataset_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size // len(distribution.worker_devices))
    eval_input_fn = self.dataset_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size // len(distribution.worker_devices))
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, batch_size=batch_size, shuffle=False)

    linear_feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    dnn_feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    feature_columns = linear_feature_columns + dnn_feature_columns
    session_config = config_pb2.ConfigProto(
        log_device_placement=True, allow_soft_placement=True)
    estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
        linear_feature_columns=linear_feature_columns,
        dnn_hidden_units=(2, 2),
        dnn_feature_columns=dnn_feature_columns,
        label_dimension=label_dimension,
        model_dir=self._model_dir,
        dnn_optimizer=adam.Adam(0.001),
        linear_optimizer=adam.Adam(0.001),
        config=run_config.RunConfig(
            train_distribute=distribution,
            eval_distribute=distribution,
            session_config=session_config))

    num_steps = 2
    if use_train_and_evaluate:
      scores, _ = training.train_and_evaluate(
          estimator, training.TrainSpec(train_input_fn, max_steps=num_steps),
          training.EvalSpec(eval_input_fn))
    else:
      estimator.train(train_input_fn, steps=num_steps)
      scores = estimator.evaluate(eval_input_fn)

    self.assertIn('loss', six.iterkeys(scores))

    predictions = np.array([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in estimator.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, label_dimension), predictions.shape)

    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                             serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
示例#31
0
  def _fit_restore_fit_test_template(self, estimator_fn, dtype):
    """Tests restoring previously fit models."""
    model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
    exogenous_feature_columns = (
        feature_column.numeric_column("exogenous"),
    )
    first_estimator = estimator_fn(model_dir, exogenous_feature_columns)
    times = numpy.arange(20, dtype=numpy.int64)
    values = numpy.arange(20, dtype=dtype.as_numpy_dtype)
    exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype)
    features = {
        feature_keys.TrainEvalFeatures.TIMES: times,
        feature_keys.TrainEvalFeatures.VALUES: values,
        "exogenous": exogenous
    }
    train_input_fn = input_pipeline.RandomWindowInputFn(
        input_pipeline.NumpyReader(features), shuffle_seed=2, num_threads=1,
        batch_size=16, window_size=16)
    eval_input_fn = input_pipeline.RandomWindowInputFn(
        input_pipeline.NumpyReader(features), shuffle_seed=3, num_threads=1,
        batch_size=16, window_size=16)
    first_estimator.train(input_fn=train_input_fn, steps=1)
    first_evaluation = first_estimator.evaluate(
        input_fn=eval_input_fn, steps=1)
    first_loss_before_fit = first_evaluation["loss"]
    self.assertAllEqual(first_loss_before_fit, first_evaluation["average_loss"])
    self.assertAllEqual([], first_loss_before_fit.shape)
    first_estimator.train(input_fn=train_input_fn, steps=1)
    first_loss_after_fit = first_estimator.evaluate(
        input_fn=eval_input_fn, steps=1)["loss"]
    self.assertAllEqual([], first_loss_after_fit.shape)
    second_estimator = estimator_fn(model_dir, exogenous_feature_columns)
    second_estimator.train(input_fn=train_input_fn, steps=1)
    whole_dataset_input_fn = input_pipeline.WholeDatasetInputFn(
        input_pipeline.NumpyReader(features))
    whole_dataset_evaluation = second_estimator.evaluate(
        input_fn=whole_dataset_input_fn, steps=1)
    exogenous_values_ten_steps = {
        "exogenous": numpy.arange(
            10, dtype=dtype.as_numpy_dtype)[None, :, None]
    }
    predict_input_fn = input_pipeline.predict_continuation_input_fn(
        evaluation=whole_dataset_evaluation,
        exogenous_features=exogenous_values_ten_steps,
        steps=10)
    # Also tests that limit_epochs in predict_continuation_input_fn prevents
    # infinite iteration
    (estimator_predictions,
    ) = list(second_estimator.predict(input_fn=predict_input_fn))
    self.assertAllEqual([10, 1], estimator_predictions["mean"].shape)
    input_receiver_fn = first_estimator.build_raw_serving_input_receiver_fn()
    export_location = first_estimator.export_saved_model(
        self.get_temp_dir(), input_receiver_fn)
    with ops.Graph().as_default():
      with session.Session() as sess:
        signatures = loader.load(sess, [tag_constants.SERVING], export_location)
        # Test that prediction and filtering can continue from evaluation output
        saved_prediction = saved_model_utils.predict_continuation(
            continue_from=whole_dataset_evaluation,
            steps=10,
            exogenous_features=exogenous_values_ten_steps,
            signatures=signatures,
            session=sess)
        # Saved model predictions should be the same as Estimator predictions
        # starting from the same evaluation.
        for prediction_key, prediction_value in estimator_predictions.items():
          self.assertAllClose(prediction_value,
                              numpy.squeeze(
                                  saved_prediction[prediction_key], axis=0))
        first_filtering = saved_model_utils.filter_continuation(
            continue_from=whole_dataset_evaluation,
            features={
                feature_keys.FilteringFeatures.TIMES: times[None, -1] + 2,
                feature_keys.FilteringFeatures.VALUES: values[None, -1] + 2.,
                "exogenous": values[None, -1, None] + 12.
            },
            signatures=signatures,
            session=sess)
        # Test that prediction and filtering can continue from filtering output
        second_saved_prediction = saved_model_utils.predict_continuation(
            continue_from=first_filtering,
            steps=1,
            exogenous_features={
                "exogenous": numpy.arange(
                    1, dtype=dtype.as_numpy_dtype)[None, :, None]
            },
            signatures=signatures,
            session=sess)
        self.assertEqual(
            times[-1] + 3,
            numpy.squeeze(
                second_saved_prediction[feature_keys.PredictionResults.TIMES]))
        saved_model_utils.filter_continuation(
            continue_from=first_filtering,
            features={
                feature_keys.FilteringFeatures.TIMES: times[-1] + 3,
                feature_keys.FilteringFeatures.VALUES: values[-1] + 3.,
                "exogenous": values[-1, None] + 13.
            },
            signatures=signatures,
            session=sess)

        # Test cold starting
        six.assertCountEqual(
            self,
            [feature_keys.FilteringFeatures.TIMES,
             feature_keys.FilteringFeatures.VALUES,
             "exogenous"],
            signatures.signature_def[
                feature_keys.SavedModelLabels.COLD_START_FILTER].inputs.keys())
        batch_numpy_times = numpy.tile(
            numpy.arange(30, dtype=numpy.int64)[None, :], (10, 1))
        batch_numpy_values = numpy.ones([10, 30, 1])
        state = saved_model_utils.cold_start_filter(
            signatures=signatures,
            session=sess,
            features={
                feature_keys.FilteringFeatures.TIMES: batch_numpy_times,
                feature_keys.FilteringFeatures.VALUES: batch_numpy_values,
                "exogenous": 10. + batch_numpy_values
            }
        )
        predict_times = numpy.tile(
            numpy.arange(30, 45, dtype=numpy.int64)[None, :], (10, 1))
        predictions = saved_model_utils.predict_continuation(
            continue_from=state,
            times=predict_times,
            exogenous_features={
                "exogenous": numpy.tile(numpy.arange(
                    15, dtype=dtype.as_numpy_dtype), (10,))[None, :, None]
            },
            signatures=signatures,
            session=sess)
        self.assertAllEqual([10, 15, 1], predictions["mean"].shape)
  def DISABLED_test_function_model_multiple_feature_layer_inputs(self):
    col_a = fc.numeric_column('a')
    col_b = fc.numeric_column('b')
    col_c = fc.numeric_column('c')

    fc1 = fc.DenseFeatures([col_a, col_b], name='fc1')
    fc2 = fc.DenseFeatures([col_b, col_c], name='fc2')
    dense = keras.layers.Dense(4)

    # This seems problematic.... We probably need something for DenseFeatures
    # the way Input is for InputLayer.
    output = dense(fc1) + dense(fc2)

    model = keras.models.Model([fc1, fc2], [output])

    optimizer = 'rmsprop'
    loss = 'mse'
    loss_weights = [1., 0.5]
    model.compile(
        optimizer,
        loss,
        metrics=[metrics_module.CategoricalAccuracy(), 'mae'],
        loss_weights=loss_weights)

    data_list = ([{
        'a': np.arange(10),
        'b': np.arange(10)
    }, {
        'b': np.arange(10),
        'c': np.arange(10)
    }], np.arange(10, 100))
    print(model.fit(*data_list, epochs=1))

    data_bloated_list = ([{
        'a': np.arange(10),
        'b': np.arange(10),
        'c': np.arange(10)
    }, {
        'a': np.arange(10),
        'b': np.arange(10),
        'c': np.arange(10)
    }], np.arange(10, 100))
    print(model.fit(*data_bloated_list, epochs=1))

    data_dict = ({
        'fc1': {
            'a': np.arange(10),
            'b': np.arange(10)
        },
        'fc2': {
            'b': np.arange(10),
            'c': np.arange(10)
        }
    }, np.arange(10, 100))
    print(model.fit(*data_dict, epochs=1))

    data_bloated_dict = ({
        'fc1': {
            'a': np.arange(10),
            'b': np.arange(10),
            'c': np.arange(10)
        },
        'fc2': {
            'a': np.arange(10),
            'b': np.arange(10),
            'c': np.arange(10)
        }
    }, np.arange(10, 100))
    print(model.fit(*data_bloated_dict, epochs=1))
示例#33
0
def get_weights_and_check_match_logits(features,
                                       weight_column,
                                       logits,
                                       allow_per_logit_weights=False):
    """Fetches weights from features and checks that the shape matches logits.

  Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape
  can be either:
  * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`.
  * [D0, D1, ... DN, 1]
  * [D0, D1, ... DN]: In this case, weights is reshaped into
    [D0, D1, ... DN, 1] to work with weight broadcasting rules.

  Args:
    features: The features dict that contains weights.
    weight_column: The weight column. If not given, this method returns 1.
    logits: logits Tensor.
    allow_per_logit_weights: Boolean. Whether we allow weights along the logits
      dimension, namely shape `[D0, D1, ... DN, logits_dimension]`.

  Returns:
    Validated and reshaped weights Tensor.

  Raises:
    ValueError: If the weights `Tensor` cannot be cast into float.
  """
    if allow_per_logit_weights:
        err_msg = (
            'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or '
            '[D0, D1, ... DN, logits_dimension]')
    else:
        err_msg = (
            'weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]')
    with ops.name_scope('weights',
                        values=tuple(six.itervalues(features)) +
                        (logits, )) as scope:
        # Fetch the weights.
        if weight_column is None:
            return 1.
        # TODO(b/117839674): update feature_column
        if isinstance(weight_column, six.string_types):
            weight_column = feature_column_lib.numeric_column(
                key=weight_column, shape=(1, ))
        if not isinstance(weight_column,
                          (feature_column_lib.NumericColumn, _NumericColumn)):
            raise TypeError(
                'Weight column must be either a string or NumericColumn.'
                ' Given type: {}.'.format(type(weight_column)))
        weights = weight_column._get_dense_tensor(  # pylint: disable=protected-access
            _LazyBuilder(features))
        if not (weights.dtype.is_floating or weights.dtype.is_integer):
            raise ValueError('Weight column should be castable to float. '
                             'Given dtype: {}'.format(weights.dtype))
        weights = math_ops.to_float(weights, name='weights')
        # Validate the weights shape.
        # Eager mode.
        if context.executing_eagerly():
            weights_shape = weights._shape_tuple()  # pylint: disable=protected-access
            logits_shape = logits._shape_tuple()  # pylint: disable=protected-access
            weights_rank = weights._rank()  # pylint: disable=protected-access
            logits_rank = logits._rank()  # pylint: disable=protected-access
            if (weights_rank is not None and logits_rank is not None
                    and weights_rank == logits_rank - 1):
                if logits_shape[:-1] != weights_shape:
                    raise ValueError(
                        '{}, logits_shape: {}. weights_shape: {}.'.format(
                            err_msg, logits_shape, weights_shape))
                return array_ops.expand_dims(weights, -1, name=scope)
            supported_weights_shape = logits_shape[:-1] + (1, )
            if allow_per_logit_weights:
                if (logits_shape != weights_shape
                        and supported_weights_shape != weights_shape):
                    raise ValueError(
                        '{}, logits_shape: {}. weights_shape: {}.'.format(
                            err_msg, logits_shape, weights_shape))
            else:
                if supported_weights_shape != weights_shape:
                    raise ValueError(
                        '{}, logits_shape: {}. weights_shape: {}.'.format(
                            err_msg, logits_shape, weights_shape))
            return weights

        # Graph mode.
        weights_shape = array_ops.shape(weights, name='weights_shape')
        logits_shape = array_ops.shape(logits, name='logits_shape')
        if (weights.shape.ndims is not None and logits.shape.ndims is not None
                and weights.shape.ndims == logits.shape.ndims - 1):
            assert_dimension = check_ops.assert_equal(logits_shape[:-1],
                                                      weights_shape,
                                                      message=err_msg,
                                                      data=[
                                                          'logits_shape: ',
                                                          logits_shape,
                                                          'weights_shape: ',
                                                          weights_shape
                                                      ])
            with ops.control_dependencies([assert_dimension]):
                return array_ops.expand_dims(weights, -1, name=scope)
        supported_weights_shape = array_ops.concat([logits_shape[:-1], [1]],
                                                   axis=0)
        if allow_per_logit_weights:
            condition = math_ops.reduce_any([
                math_ops.reduce_all(math_ops.equal(logits_shape,
                                                   weights_shape)),
                math_ops.reduce_all(
                    math_ops.equal(supported_weights_shape, weights_shape))
            ])
            assert_dimension = control_flow_ops.Assert(condition=condition,
                                                       data=[
                                                           err_msg,
                                                           'logits_shape: ',
                                                           logits_shape,
                                                           'weights_shape: ',
                                                           weights_shape
                                                       ])
        else:
            assert_dimension = check_ops.assert_equal(supported_weights_shape,
                                                      weights_shape,
                                                      message=err_msg,
                                                      data=[
                                                          'logits_shape: ',
                                                          logits_shape,
                                                          'weights_shape: ',
                                                          weights_shape
                                                      ])
        with ops.control_dependencies([assert_dimension]):
            return array_ops.identity(weights, name=scope)
示例#34
0
    def testBiasAndOtherColumns(self):
        """Tests LinearRegressor with LinearSDCA and validates bias weight."""
        def input_fn():
            """Testing the bias weight when there are other features present.

      1/2 of the instances in this input have feature 'a', the rest have
      feature 'b', and we expect the bias to be added to each instance as well.
      0.4 of all instances that have feature 'a' are positive, and 0.2 of all
      instances that have feature 'b' are positive. The labels in the dataset
      are ordered to appear shuffled since SDCA expects shuffled data, and
      converges faster with this pseudo-random ordering.
      If the bias was not regularized we would expect the weights to be:
      bias: 0.3
      a: 0.1
      b: -0.1
      Bu with bias regularization the optimal values are:
      bias: 0.2
      a: 0.2
      b: 0.0
      Returns:
        The test dataset.
      """
            num_examples = 200
            half = int(num_examples / 2)
            return {
                'example_id':
                constant_op.constant([str(x + 1)
                                      for x in range(num_examples)]),
                'a':
                constant_op.constant([[1]] * int(half) + [[0]] * int(half)),
                'b':
                constant_op.constant([[0]] * int(half) + [[1]] * int(half)),
            }, constant_op.constant(
                [[x] for x in [1, 0, 0, 1, 1, 0, 0, 0, 1, 0] * int(half / 10) +
                 [0, 1, 0, 0, 0, 0, 0, 0, 1, 0] * int(half / 10)])

        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.1)
        regressor = linear.LinearRegressorV2(feature_columns=[
            feature_column_lib.numeric_column('a'),
            feature_column_lib.numeric_column('b')
        ],
                                             optimizer=optimizer)

        regressor.train(input_fn=input_fn, steps=200)

        variable_names = regressor.get_variable_names()
        self.assertIn('linear/linear_model/bias_weights', variable_names)
        self.assertIn('linear/linear_model/a/weights', variable_names)
        self.assertIn('linear/linear_model/b/weights', variable_names)
        # TODO(b/29339026): Change the expected results to expect a centered bias.
        self.assertNear(regressor.get_variable_value(
            'linear/linear_model/bias_weights')[0],
                        0.2,
                        err=0.05)
        self.assertNear(
            regressor.get_variable_value('linear/linear_model/a/weights')[0],
            0.2,
            err=0.05)
        self.assertNear(
            regressor.get_variable_value('linear/linear_model/b/weights')[0],
            0.0,
            err=0.05)
示例#35
0
import sys

import tensorflow as tf

sys.stderr.write("Using TensorFlow " + tf.__version__ + "\n")

mtcars_input_fn = tf.estimator.inputs.numpy_input_fn(
      x = {
        "disp": numpy.array([160,160,108,258,360,225,360,146.7,140.8,167.6,167.6,275.8,275.8,275.8,472,460,440,78.7,75.7,71.1,120.1,318,304,350,400,79,120.3,95.1,351,145,301,121]),
        "cyl": numpy.array([6,6,4,6,8,6,8,4,4,6,6,8,8,8,8,8,8,4,4,4,4,8,8,8,8,4,4,4,8,6,8,4])
      },
      y = numpy.array([21,21,22.8,21.4,18.7,18.1,14.3,24.4,22.8,19.2,17.8,16.4,17.3,15.2,10.4,10.4,14.7,32.4,30.4,33.9,21.5,15.5,15.2,13.3,19.2,27.3,26,30.4,15.8,19.7,15,21.4]),
      num_epochs = None,
      shuffle = True)

estimator = LinearRegressor(
    feature_columns=[
        feature_column_lib.numeric_column(
        key = "disp",
        shape = [1],
        dtype = tf.float32),
      feature_column_lib.numeric_column(
        key = "cyl",
        shape = [1],
        dtype = tf.float32)
    ])

sys.stderr.write("Train Start\n")
estimator.train(input_fn = mtcars_input_fn, steps = 2000)
sys.stderr.write("Train End\n")
示例#36
0
    def _fit_restore_fit_test_template(self, estimator_fn, test_saved_model):
        """Tests restoring previously fit models."""
        temp_dir = self.get_temp_dir()
        model_dir = tempfile.mkdtemp(dir=temp_dir)
        exogenous_feature_columns = (
            feature_column.numeric_column("exogenous"), )
        first_estimator = estimator_fn(model_dir, exogenous_feature_columns)
        train_input_fn = _build_input_fn_with_seed(2)
        eval_input_fn = _build_input_fn_with_seed(3)
        first_estimator.train(input_fn=train_input_fn, steps=1)
        first_evaluation = first_estimator.evaluate(input_fn=eval_input_fn,
                                                    steps=1)
        first_loss_before_fit = first_evaluation["loss"]
        self.assertAllEqual(first_loss_before_fit,
                            first_evaluation["average_loss"])
        self.assertAllEqual([], first_loss_before_fit.shape)
        first_estimator.train(input_fn=train_input_fn, steps=1)
        first_loss_after_fit = first_estimator.evaluate(input_fn=eval_input_fn,
                                                        steps=1)["loss"]
        self.assertAllEqual([], first_loss_after_fit.shape)
        second_estimator = estimator_fn(model_dir, exogenous_feature_columns)
        second_estimator.train(input_fn=train_input_fn, steps=1)
        second_evaluation = second_estimator.evaluate(input_fn=eval_input_fn,
                                                      steps=1)
        exogenous_values_ten_steps = {
            "exogenous": math_ops.range(10, dtype=dtypes.float32)[None, :,
                                                                  None]
        }
        input_receiver_fn = first_estimator.build_raw_serving_input_receiver_fn(
        )
        export_location = first_estimator.export_saved_model(
            temp_dir, input_receiver_fn)
        if not test_saved_model:
            return
        with ops.Graph().as_default():
            with session.Session() as sess:
                signatures = loader.load(sess, [tag_constants.SERVING],
                                         export_location)
                # Test that prediction and filtering can continue from evaluation output
                _ = saved_model_utils.predict_continuation(
                    continue_from=second_evaluation,
                    steps=10,
                    exogenous_features=exogenous_values_ten_steps,
                    signatures=signatures,
                    session=sess)
                times, values, _ = _generate_data()
                first_filtering = saved_model_utils.filter_continuation(
                    continue_from=second_evaluation,
                    features={
                        feature_keys.FilteringFeatures.TIMES:
                        times[None, -1] + 2,
                        feature_keys.FilteringFeatures.VALUES:
                        values[None, -1] + 2.,
                        "exogenous": values[None, -1, None] + 12.
                    },
                    signatures=signatures,
                    session=sess)
                # Test that prediction and filtering can continue from filtering output
                second_saved_prediction = saved_model_utils.predict_continuation(
                    continue_from=first_filtering,
                    steps=1,
                    exogenous_features={
                        "exogenous":
                        math_ops.range(1, dtype=dtypes.float32)[None, :, None]
                    },
                    signatures=signatures,
                    session=sess)
                self.assertEqual(
                    times[-1] + 3,
                    array_ops.squeeze(second_saved_prediction[
                        feature_keys.PredictionResults.TIMES]))
                saved_model_utils.filter_continuation(
                    continue_from=first_filtering,
                    features={
                        feature_keys.FilteringFeatures.TIMES: times[-1] + 3,
                        feature_keys.FilteringFeatures.VALUES: values[-1] + 3.,
                        "exogenous": values[-1, None] + 13.
                    },
                    signatures=signatures,
                    session=sess)

                # Test cold starting
                six.assertCountEqual(
                    self, [
                        feature_keys.FilteringFeatures.TIMES,
                        feature_keys.FilteringFeatures.VALUES, "exogenous"
                    ],
                    signatures.signature_def[feature_keys.SavedModelLabels.
                                             COLD_START_FILTER].inputs.keys())
                batched_times = array_ops.tile(
                    math_ops.range(30, dtype=dtypes.int64)[None, :], (10, 1))
                batched_values = array_ops.ones([10, 30, 1])
                state = saved_model_utils.cold_start_filter(
                    signatures=signatures,
                    session=sess,
                    features={
                        feature_keys.FilteringFeatures.TIMES: batched_times,
                        feature_keys.FilteringFeatures.VALUES: batched_values,
                        "exogenous": 10. + batched_values
                    })
                predict_times = math_ops.tile(
                    math_ops.range(30, 45, dtype=dtypes.int64)[None, :],
                    (10, 1))
                predictions = saved_model_utils.predict_continuation(
                    continue_from=state,
                    times=predict_times,
                    exogenous_features={
                        "exogenous":
                        math_ops.tile(math_ops.range(15, dtype=dtypes.float32),
                                      (10, ))[None, :, None]
                    },
                    signatures=signatures,
                    session=sess)
                self.assertAllEqual([10, 15, 1], predictions["mean"].shape)
示例#37
0
    def testCalibratedEtlMonotonicClassifierTraining(self):
        # Construct the following training pair.
        #
        # Training: (x, y)
        # ([0., 0.], 0.0)
        # ([0., 1.], 1.0)
        # ([1., 0.], 1.0)
        # ([1., 1.], 0.0)
        #
        # which is not a monotonic function. Then check the forcing monotonicity
        # resulted in the following monotonicity or not.
        # f(0, 0) <= f(0, 1), f(0, 0) <= f(1, 0), f(0, 1) <= f(1, 1),
        # f(1, 0) < = f(1, 1).
        x0 = np.array([0.0, 0.0, 1.0, 1.0])
        x1 = np.array([0.0, 1.0, 0.0, 1.0])
        x_samples = {'x0': x0, 'x1': x1}
        training_y = np.array([[False], [True], [True], [False]])

        train_input_fn = numpy_io.numpy_input_fn(x=x_samples,
                                                 y=training_y,
                                                 batch_size=4,
                                                 num_epochs=1000,
                                                 shuffle=False)
        test_input_fn = numpy_io.numpy_input_fn(x=x_samples,
                                                y=None,
                                                shuffle=False)

        # Define monotonic lattice classifier.
        feature_columns = [
            feature_column_lib.numeric_column('x0'),
            feature_column_lib.numeric_column('x1'),
        ]

        def init_fn():
            return keypoints_initialization.uniform_keypoints_for_signal(
                2, 0., 1., 0., 1.)

        hparams = tfl_hparams.CalibratedEtlHParams(num_keypoints=2,
                                                   monotonic_num_lattices=2,
                                                   monotonic_lattice_rank=2,
                                                   monotonic_lattice_size=2)
        hparams.set_param('calibration_monotonic', +1)
        hparams.set_param('lattice_monotonic', True)
        hparams.set_param('learning_rate', 0.1)

        estimator = calibrated_etl.calibrated_etl_classifier(
            feature_columns=feature_columns,
            hparams=hparams,
            keypoints_initializers_fn=init_fn)
        estimator.train(input_fn=train_input_fn)
        predictions = [
            results['logits'][0]
            for results in estimator.predict(input_fn=test_input_fn)
        ]

        self.assertEqual(len(predictions), 4)
        # Check monotonicity. Note that projection has its own precision, so we
        # add a small number.
        self.assertLess(predictions[0], predictions[1] + 1e-6)
        self.assertLess(predictions[0], predictions[2] + 1e-6)
        self.assertLess(predictions[1], predictions[3] + 1e-6)
        self.assertLess(predictions[2], predictions[3] + 1e-6)
    def test_complete_flow_with_mode(self, distribution,
                                     use_train_and_evaluate):
        label_dimension = 2
        input_dimension = label_dimension
        batch_size = 10
        data = np.linspace(0.,
                           2.,
                           batch_size * label_dimension,
                           dtype=np.float32)
        data = data.reshape(batch_size, label_dimension)
        train_input_fn = self.dataset_input_fn(
            x={'x': data},
            y=data,
            batch_size=batch_size // len(distribution.worker_devices))
        eval_input_fn = self.dataset_input_fn(x={'x': data},
                                              y=data,
                                              batch_size=batch_size //
                                              len(distribution.worker_devices))
        predict_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                   batch_size=batch_size,
                                                   shuffle=False)

        linear_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        dnn_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        feature_columns = linear_feature_columns + dnn_feature_columns
        session_config = config_pb2.ConfigProto(log_device_placement=True,
                                                allow_soft_placement=True)
        estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=linear_feature_columns,
            dnn_hidden_units=(2, 2),
            dnn_feature_columns=dnn_feature_columns,
            label_dimension=label_dimension,
            model_dir=self._model_dir,
            dnn_optimizer=adam.Adam(0.001),
            linear_optimizer=adam.Adam(0.001),
            config=run_config.RunConfig(train_distribute=distribution,
                                        eval_distribute=distribution,
                                        session_config=session_config))

        num_steps = 2
        if use_train_and_evaluate:
            scores, _ = training.train_and_evaluate(
                estimator,
                training.TrainSpec(train_input_fn, max_steps=num_steps),
                training.EvalSpec(eval_input_fn))
        else:
            estimator.train(train_input_fn, steps=num_steps)
            scores = estimator.evaluate(eval_input_fn)

        self.assertIn('loss', six.iterkeys(scores))

        predictions = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, label_dimension), predictions.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                                 serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
示例#39
0
 def test_label_key_should_not_be_used_as_feature(self):
     with self.assertRaisesRegexp(ValueError,
                                  'label should not be used as feature'):
         parsing_utils.classifier_parse_example_spec(
             feature_columns=[fc.numeric_column('a')], label_key='a')
示例#40
0
    def testSparseFeaturesWithL1Reg(self):
        """Tests LinearRegressor with LinearSDCA and sparse features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([[0.4], [0.6], [0.3]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[10.0], [10.0], [10.0]])
            }, constant_op.constant([[1.4], [-0.8], [2.6]])

        price = feature_column_lib.numeric_column('price')
        country = feature_column_lib.categorical_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        # Regressor with no L1 regularization.
        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l2_regularization=0.1)
        regressor = linear.LinearRegressorV2(feature_columns=[price, country],
                                             weight_column='weights',
                                             optimizer=optimizer)
        regressor.train(input_fn=input_fn, steps=20)
        no_l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
        variable_names = regressor.get_variable_names()
        self.assertIn('linear/linear_model/price/weights', variable_names)
        self.assertIn('linear/linear_model/country/weights', variable_names)
        no_l1_reg_weights = {
            'linear/linear_model/price/weights':
            regressor.get_variable_value('linear/linear_model/price/weights'),
            'linear/linear_model/country/weights':
            regressor.get_variable_value(
                'linear/linear_model/country/weights'),
        }

        # Regressor with L1 regularization.
        optimizer = linear.LinearSDCA(example_id_column='example_id',
                                      symmetric_l1_regularization=1.0,
                                      symmetric_l2_regularization=0.1)
        regressor = linear.LinearRegressorV2(feature_columns=[price, country],
                                             weight_column='weights',
                                             optimizer=optimizer)
        regressor.train(input_fn=input_fn, steps=20)
        l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
        l1_reg_weights = {
            'linear/linear_model/price/weights':
            regressor.get_variable_value('linear/linear_model/price/weights'),
            'linear/linear_model/country/weights':
            regressor.get_variable_value(
                'linear/linear_model/country/weights'),
        }

        # Unregularized loss is lower when there is no L1 regularization.
        self.assertLess(no_l1_reg_loss, l1_reg_loss)
        self.assertLess(no_l1_reg_loss, 0.05)

        # But weights returned by the regressor with L1 regularization have smaller
        # L1 norm.
        l1_reg_weights_norm, no_l1_reg_weights_norm = 0.0, 0.0
        for var_name in sorted(l1_reg_weights):
            l1_reg_weights_norm += sum(
                np.absolute(l1_reg_weights[var_name].flatten()))
            no_l1_reg_weights_norm += sum(
                np.absolute(no_l1_reg_weights[var_name].flatten()))
            print('Var name: %s, value: %s' %
                  (var_name, no_l1_reg_weights[var_name].flatten()))
        self.assertLess(l1_reg_weights_norm, no_l1_reg_weights_norm)
  def _complete_flow(self,
                     train_distribute,
                     eval_distribute,
                     remote_cluster=None,
                     use_train_and_evaluate=True):
    estimator = self._get_estimator(train_distribute, eval_distribute,
                                    remote_cluster)

    input_dimension = LABEL_DIMENSION
    train_input_fn = self.dataset_input_fn(
        x={"x": DATA},
        y=DATA,
        batch_size=BATCH_SIZE // train_distribute.num_replicas_in_sync,
        shuffle=True)
    if eval_distribute:
      eval_batch_size = BATCH_SIZE // eval_distribute.num_replicas_in_sync
    else:
      eval_batch_size = BATCH_SIZE
    eval_input_fn = self.dataset_input_fn(
        x={"x": DATA}, y=DATA, batch_size=eval_batch_size, shuffle=False)

    linear_feature_columns = [
        feature_column.numeric_column("x", shape=(input_dimension,))
    ]
    dnn_feature_columns = [
        feature_column.numeric_column("x", shape=(input_dimension,))
    ]
    feature_columns = linear_feature_columns + dnn_feature_columns

    eval_spec = estimator_training.EvalSpec(
        name=EVAL_NAME,
        input_fn=eval_input_fn,
        steps=None,
        exporters=self._get_exporter(EXPORTER_NAME, feature_columns),
        start_delay_secs=0,
        throttle_secs=1)

    if use_train_and_evaluate:
      estimator_training.train_and_evaluate(
          estimator,
          estimator_training.TrainSpec(train_input_fn, max_steps=MAX_STEPS),
          eval_spec)
    else:
      estimator.train(train_input_fn, max_steps=MAX_STEPS)

      latest_ckpt_path = estimator.latest_checkpoint()
      metrics = estimator.evaluate(eval_input_fn,
                                   checkpoint_path=latest_ckpt_path,
                                   name=EVAL_NAME)

      # Export the eval result to files.
      eval_result = estimator_training._EvalResult(
          status=estimator_training._EvalStatus.EVALUATED,
          metrics=metrics,
          checkpoint_path=latest_ckpt_path)
      evaluator = estimator_training._TrainingExecutor._Evaluator(estimator,
                                                                  eval_spec,
                                                                  None)
      evaluator._export_eval_result(eval_result, True)

    return estimator
    def DISABLED_test_function_model_multiple_feature_layer_inputs(self):
        col_a = fc.numeric_column('a')
        col_b = fc.numeric_column('b')
        col_c = fc.numeric_column('c')

        fc1 = df.DenseFeatures([col_a, col_b], name='fc1')
        fc2 = df.DenseFeatures([col_b, col_c], name='fc2')
        dense = keras.layers.Dense(4)

        # This seems problematic.... We probably need something for DenseFeatures
        # the way Input is for InputLayer.
        output = dense(fc1) + dense(fc2)

        model = keras.models.Model([fc1, fc2], [output])

        optimizer = 'rmsprop'
        loss = 'mse'
        loss_weights = [1., 0.5]
        model.compile(optimizer,
                      loss,
                      metrics=[metrics_module.CategoricalAccuracy(), 'mae'],
                      loss_weights=loss_weights)

        data_list = ([{
            'a': np.arange(10),
            'b': np.arange(10)
        }, {
            'b': np.arange(10),
            'c': np.arange(10)
        }], np.arange(10, 100))
        model.fit(*data_list, epochs=1)

        data_bloated_list = ([{
            'a': np.arange(10),
            'b': np.arange(10),
            'c': np.arange(10)
        }, {
            'a': np.arange(10),
            'b': np.arange(10),
            'c': np.arange(10)
        }], np.arange(10, 100))
        model.fit(*data_bloated_list, epochs=1)

        data_dict = ({
            'fc1': {
                'a': np.arange(10),
                'b': np.arange(10)
            },
            'fc2': {
                'b': np.arange(10),
                'c': np.arange(10)
            }
        }, np.arange(10, 100))
        model.fit(*data_dict, epochs=1)

        data_bloated_dict = ({
            'fc1': {
                'a': np.arange(10),
                'b': np.arange(10),
                'c': np.arange(10)
            },
            'fc2': {
                'a': np.arange(10),
                'b': np.arange(10),
                'c': np.arange(10)
            }
        }, np.arange(10, 100))
        model.fit(*data_bloated_dict, epochs=1)
示例#43
0
    def test_complete_flow_with_mode(self, distribution,
                                     use_train_and_evaluate):
        label_dimension = 2
        input_dimension = label_dimension
        batch_size = 10
        data = np.linspace(0.,
                           2.,
                           batch_size * label_dimension,
                           dtype=np.float32)
        data = data.reshape(batch_size, label_dimension)
        train_input_fn = self.dataset_input_fn(
            x={'x': data},
            y=data,
            batch_size=batch_size // distribution.num_replicas_in_sync,
            shuffle=True)
        eval_input_fn = self.dataset_input_fn(
            x={'x': data},
            y=data,
            batch_size=batch_size // distribution.num_replicas_in_sync,
            shuffle=False)
        predict_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                   batch_size=batch_size,
                                                   shuffle=False)

        linear_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        dnn_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        feature_columns = linear_feature_columns + dnn_feature_columns
        estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=linear_feature_columns,
            dnn_hidden_units=(2, 2),
            dnn_feature_columns=dnn_feature_columns,
            label_dimension=label_dimension,
            model_dir=self._model_dir,
            # TODO(isaprykin): Work around the colocate_with error.
            dnn_optimizer=adagrad.AdagradOptimizer(0.001),
            linear_optimizer=adagrad.AdagradOptimizer(0.001),
            config=run_config.RunConfig(train_distribute=distribution,
                                        eval_distribute=distribution))

        num_steps = 10
        if use_train_and_evaluate:
            scores, _ = training.train_and_evaluate(
                estimator,
                training.TrainSpec(train_input_fn, max_steps=num_steps),
                training.EvalSpec(eval_input_fn))
        else:
            estimator.train(train_input_fn, steps=num_steps)
            scores = estimator.evaluate(eval_input_fn)

        self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
        self.assertIn('loss', scores)

        predictions = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, label_dimension), predictions.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_saved_model(tempfile.mkdtemp(),
                                                  serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
示例#44
0
    def test_encode_listwise_features_renaming(self):
        """Tests for using different names in feature columns vs features."""
        with tf.Graph().as_default():
            # Batch size = 2, list_size = 2.
            features = {
                "query_length":
                tf.convert_to_tensor(value=[[1], [2]]),
                "utility":
                tf.convert_to_tensor(value=[[[1.0], [0.0]], [[0.0], [1.0]]]),
                "unigrams":
                tf.SparseTensor(indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0],
                                         [1, 1, 0]],
                                values=[
                                    "ranking", "regression", "classification",
                                    "ordinal"
                                ],
                                dense_shape=[2, 2, 1])
            }
            context_feature_columns = {
                "query_length":
                feature_column.numeric_column("query_length",
                                              shape=(1, ),
                                              default_value=0,
                                              dtype=tf.int64)
            }
            example_feature_columns = {
                "utility_renamed":
                feature_column.numeric_column("utility",
                                              shape=(1, ),
                                              default_value=0.0,
                                              dtype=tf.float32),
                "unigrams_renamed":
                feature_column.embedding_column(
                    feature_column.categorical_column_with_vocabulary_list(
                        "unigrams",
                        vocabulary_list=[
                            "ranking", "regression", "classification",
                            "ordinal"
                        ]),
                    dimension=10)
            }

            context_features, example_features = feature_lib.encode_listwise_features(
                features,
                input_size=2,
                context_feature_columns=context_feature_columns,
                example_feature_columns=example_feature_columns)
            self.assertAllEqual(["query_length"], sorted(context_features))
            self.assertAllEqual(["unigrams_renamed", "utility_renamed"],
                                sorted(example_features))
            self.assertAllEqual(
                [2, 2, 10],
                example_features["unigrams_renamed"].get_shape().as_list())
            with tf.compat.v1.Session() as sess:
                sess.run(tf.compat.v1.global_variables_initializer())
                sess.run(tf.compat.v1.tables_initializer())
                context_features, example_features = sess.run(
                    [context_features, example_features])
                self.assertAllEqual([[1], [2]],
                                    context_features["query_length"])
                self.assertAllEqual([[[1.0], [0.0]], [[0.0], [1.0]]],
                                    example_features["utility_renamed"])
示例#45
0
    def testCalibratedRtlMonotonicClassifierTraining(self):
        # Construct the following training/testing pair.
        #
        # Training: (x, y)
        # ([0., 0.], 0.0)
        # ([0., 1.], 1.0)
        # ([1., 0.], 1.0)
        # ([1., 1.], 0.0)
        #
        # Test: (x, y)
        # ([0., 0.], 0.0)
        # ([0., 1.], 1.0)
        # ([1., 0.], 1.0)
        # ([1., 1.], 1.0)
        #
        # Note that training example has a noisy sample, ([1., 1.], 0.0), and test
        # examples are generated by the logical-OR function. Therefore by enforcing
        # increasing monotonicity to all features, we should be able to work well
        # in the test examples.
        x0 = np.array([0.0, 0.0, 1.0, 1.0])
        x1 = np.array([0.0, 1.0, 0.0, 1.0])
        x_samples = {'x0': x0, 'x1': x1}
        training_y = np.array([[False], [True], [True], [False]])
        test_y = np.array([[False], [True], [True], [True]])

        train_input_fn = numpy_io.numpy_input_fn(x=x_samples,
                                                 y=training_y,
                                                 batch_size=4,
                                                 num_epochs=1000,
                                                 shuffle=False)
        test_input_fn = numpy_io.numpy_input_fn(x=x_samples,
                                                y=test_y,
                                                shuffle=False)

        # Define monotonic lattice classifier.
        feature_columns = [
            feature_column_lib.numeric_column('x0'),
            feature_column_lib.numeric_column('x1'),
        ]

        def init_fn():
            return keypoints_initialization.uniform_keypoints_for_signal(
                2, 0., 1., 0., 1.)

        hparams = tfl_hparams.CalibratedRtlHParams(num_keypoints=2,
                                                   num_lattices=3,
                                                   lattice_rank=2)
        # Monotonic calibrated lattice.

        hparams.set_param('monotonicity', +1)
        hparams.set_param('learning_rate', 0.1)
        hparams.set_param('interpolation_type', 'hypercube')

        estimator = calibrated_rtl.calibrated_rtl_classifier(
            feature_columns=feature_columns,
            hparams=hparams,
            keypoints_initializers_fn=init_fn)

        estimator.train(input_fn=train_input_fn)
        results = estimator.evaluate(input_fn=test_input_fn)
        # We should expect 1.0 accuracy.
        self.assertGreater(results['accuracy'], 0.999)
示例#46
0
    def test_encode_listwise_features(self):
        # Batch size = 2, list_size = 2.
        features = {
            "query_length":
            ops.convert_to_tensor([[1], [2]]),
            "utility":
            ops.convert_to_tensor([[[1.0], [0.0]], [[0.0], [1.0]]]),
            "unigrams":
            sparse_tensor_lib.SparseTensor(
                indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 0]],
                values=["ranking", "regression", "classification", "ordinal"],
                dense_shape=[2, 2, 1])
        }
        context_feature_columns = {
            "query_length":
            feature_column.numeric_column("query_length",
                                          shape=(1, ),
                                          default_value=0,
                                          dtype=dtypes.int64)
        }
        example_feature_columns = {
            "utility":
            feature_column.numeric_column("utility",
                                          shape=(1, ),
                                          default_value=0.0,
                                          dtype=dtypes.float32),
            "unigrams":
            feature_column.embedding_column(
                feature_column.categorical_column_with_vocabulary_list(
                    "unigrams",
                    vocabulary_list=[
                        "ranking", "regression", "classification", "ordinal"
                    ]),
                dimension=10)
        }

        with self.assertRaisesRegexp(
                ValueError,
                r"2nd dimesion of tensor must be equal to input size: 3, but found .*"
        ):
            feature_lib.encode_listwise_features(
                features,
                input_size=3,
                context_feature_columns=context_feature_columns,
                example_feature_columns=example_feature_columns)

        context_features, example_features = feature_lib.encode_listwise_features(
            features,
            input_size=2,
            context_feature_columns=context_feature_columns,
            example_feature_columns=example_feature_columns)
        self.assertAllEqual(["query_length"], sorted(context_features))
        self.assertAllEqual(["unigrams", "utility"], sorted(example_features))
        self.assertAllEqual([2, 2, 10],
                            example_features["unigrams"].get_shape().as_list())
        with session.Session() as sess:
            sess.run(variables.global_variables_initializer())
            sess.run(lookup_ops.tables_initializer())
            context_features, example_features = sess.run(
                [context_features, example_features])
            self.assertAllEqual([[1], [2]], context_features["query_length"])
            self.assertAllEqual([[[1.0], [0.0]], [[0.0], [1.0]]],
                                example_features["utility"])
示例#47
0
  def _complete_flow(self,
                     train_distribute,
                     eval_distribute,
                     remote_cluster=None,
                     use_train_and_evaluate=True):
    estimator = self._get_estimator(train_distribute, eval_distribute,
                                    remote_cluster)

    input_dimension = LABEL_DIMENSION
    train_input_fn = self.dataset_input_fn(
        x={"x": DATA},
        y=DATA,
        batch_size=BATCH_SIZE // train_distribute.num_replicas_in_sync,
        shuffle=True)
    if eval_distribute:
      eval_batch_size = BATCH_SIZE // eval_distribute.num_replicas_in_sync
    else:
      eval_batch_size = BATCH_SIZE
    eval_input_fn = self.dataset_input_fn(
        x={"x": DATA}, y=DATA, batch_size=eval_batch_size, shuffle=False)

    linear_feature_columns = [
        feature_column.numeric_column("x", shape=(input_dimension,))
    ]
    dnn_feature_columns = [
        feature_column.numeric_column("x", shape=(input_dimension,))
    ]
    feature_columns = linear_feature_columns + dnn_feature_columns

    eval_spec = estimator_training.EvalSpec(
        name=EVAL_NAME,
        input_fn=eval_input_fn,
        steps=None,
        exporters=self._get_exporter(EXPORTER_NAME, feature_columns),
        start_delay_secs=0,
        throttle_secs=1)

    if use_train_and_evaluate:
      estimator_training.train_and_evaluate(
          estimator,
          estimator_training.TrainSpec(train_input_fn, max_steps=MAX_STEPS),
          eval_spec)
    else:
      estimator.train(train_input_fn, max_steps=MAX_STEPS)

      latest_ckpt_path = estimator.latest_checkpoint()
      metrics = estimator.evaluate(eval_input_fn,
                                   checkpoint_path=latest_ckpt_path,
                                   name=EVAL_NAME)

      # Export the eval result to files.
      eval_result = estimator_training._EvalResult(
          status=estimator_training._EvalStatus.EVALUATED,
          metrics=metrics,
          checkpoint_path=latest_ckpt_path)
      evaluator = estimator_training._TrainingExecutor._Evaluator(estimator,
                                                                  eval_spec,
                                                                  None)
      evaluator._export_eval_result(eval_result, True)

    return estimator
示例#48
0
    def _fit_restore_fit_test_template(self, estimator_fn, dtype):
        """Tests restoring previously fit models."""
        model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
        exogenous_feature_columns = (
            feature_column.numeric_column("exogenous"), )
        first_estimator = estimator_fn(model_dir, exogenous_feature_columns)
        times = numpy.arange(20, dtype=numpy.int64)
        values = numpy.arange(20, dtype=dtype.as_numpy_dtype)
        exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype)
        features = {
            feature_keys.TrainEvalFeatures.TIMES: times,
            feature_keys.TrainEvalFeatures.VALUES: values,
            "exogenous": exogenous
        }
        train_input_fn = input_pipeline.RandomWindowInputFn(
            input_pipeline.NumpyReader(features),
            shuffle_seed=2,
            num_threads=1,
            batch_size=16,
            window_size=16)
        eval_input_fn = input_pipeline.RandomWindowInputFn(
            input_pipeline.NumpyReader(features),
            shuffle_seed=3,
            num_threads=1,
            batch_size=16,
            window_size=16)
        first_estimator.train(input_fn=train_input_fn, steps=1)
        first_evaluation = first_estimator.evaluate(input_fn=eval_input_fn,
                                                    steps=1)
        first_loss_before_fit = first_evaluation["loss"]
        self.assertAllEqual(first_loss_before_fit,
                            first_evaluation["average_loss"])
        self.assertAllEqual([], first_loss_before_fit.shape)
        first_estimator.train(input_fn=train_input_fn, steps=1)
        first_loss_after_fit = first_estimator.evaluate(input_fn=eval_input_fn,
                                                        steps=1)["loss"]
        self.assertAllEqual([], first_loss_after_fit.shape)
        second_estimator = estimator_fn(model_dir, exogenous_feature_columns)
        second_estimator.train(input_fn=train_input_fn, steps=1)
        whole_dataset_input_fn = input_pipeline.WholeDatasetInputFn(
            input_pipeline.NumpyReader(features))
        whole_dataset_evaluation = second_estimator.evaluate(
            input_fn=whole_dataset_input_fn, steps=1)
        exogenous_values_ten_steps = {
            "exogenous":
            numpy.arange(10, dtype=dtype.as_numpy_dtype)[None, :, None]
        }
        predict_input_fn = input_pipeline.predict_continuation_input_fn(
            evaluation=whole_dataset_evaluation,
            exogenous_features=exogenous_values_ten_steps,
            steps=10)
        # Also tests that limit_epochs in predict_continuation_input_fn prevents
        # infinite iteration
        (estimator_predictions, ) = list(
            second_estimator.predict(input_fn=predict_input_fn))
        self.assertAllEqual([10, 1], estimator_predictions["mean"].shape)
        input_receiver_fn = first_estimator.build_raw_serving_input_receiver_fn(
        )
        export_location = first_estimator.export_saved_model(
            self.get_temp_dir(), input_receiver_fn)
        with ops.Graph().as_default():
            with session.Session() as sess:
                signatures = loader.load(sess, [tag_constants.SERVING],
                                         export_location)
                # Test that prediction and filtering can continue from evaluation output
                saved_prediction = saved_model_utils.predict_continuation(
                    continue_from=whole_dataset_evaluation,
                    steps=10,
                    exogenous_features=exogenous_values_ten_steps,
                    signatures=signatures,
                    session=sess)
                # Saved model predictions should be the same as Estimator predictions
                # starting from the same evaluation.
                for prediction_key, prediction_value in estimator_predictions.items(
                ):
                    self.assertAllClose(
                        prediction_value,
                        numpy.squeeze(saved_prediction[prediction_key],
                                      axis=0))
                first_filtering = saved_model_utils.filter_continuation(
                    continue_from=whole_dataset_evaluation,
                    features={
                        feature_keys.FilteringFeatures.TIMES:
                        times[None, -1] + 2,
                        feature_keys.FilteringFeatures.VALUES:
                        values[None, -1] + 2.,
                        "exogenous": values[None, -1, None] + 12.
                    },
                    signatures=signatures,
                    session=sess)
                # Test that prediction and filtering can continue from filtering output
                second_saved_prediction = saved_model_utils.predict_continuation(
                    continue_from=first_filtering,
                    steps=1,
                    exogenous_features={
                        "exogenous":
                        numpy.arange(1, dtype=dtype.as_numpy_dtype)[None, :,
                                                                    None]
                    },
                    signatures=signatures,
                    session=sess)
                self.assertEqual(
                    times[-1] + 3,
                    numpy.squeeze(second_saved_prediction[
                        feature_keys.PredictionResults.TIMES]))
                saved_model_utils.filter_continuation(
                    continue_from=first_filtering,
                    features={
                        feature_keys.FilteringFeatures.TIMES: times[-1] + 3,
                        feature_keys.FilteringFeatures.VALUES: values[-1] + 3.,
                        "exogenous": values[-1, None] + 13.
                    },
                    signatures=signatures,
                    session=sess)

                # Test cold starting
                six.assertCountEqual(
                    self, [
                        feature_keys.FilteringFeatures.TIMES,
                        feature_keys.FilteringFeatures.VALUES, "exogenous"
                    ],
                    signatures.signature_def[feature_keys.SavedModelLabels.
                                             COLD_START_FILTER].inputs.keys())
                batch_numpy_times = numpy.tile(
                    numpy.arange(30, dtype=numpy.int64)[None, :], (10, 1))
                batch_numpy_values = numpy.ones([10, 30, 1])
                state = saved_model_utils.cold_start_filter(
                    signatures=signatures,
                    session=sess,
                    features={
                        feature_keys.FilteringFeatures.TIMES:
                        batch_numpy_times,
                        feature_keys.FilteringFeatures.VALUES:
                        batch_numpy_values,
                        "exogenous": 10. + batch_numpy_values
                    })
                predict_times = numpy.tile(
                    numpy.arange(30, 45, dtype=numpy.int64)[None, :], (10, 1))
                predictions = saved_model_utils.predict_continuation(
                    continue_from=state,
                    times=predict_times,
                    exogenous_features={
                        "exogenous":
                        numpy.tile(
                            numpy.arange(15, dtype=dtype.as_numpy_dtype),
                            (10, ))[None, :, None]
                    },
                    signatures=signatures,
                    session=sess)
                self.assertAllEqual([10, 15, 1], predictions["mean"].shape)
  def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate):
    label_dimension = 2
    input_dimension = label_dimension
    batch_size = 10
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)
    train_input_fn = self.dataset_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size // distribution.num_replicas_in_sync,
        shuffle=True)
    eval_input_fn = self.dataset_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size // distribution.num_replicas_in_sync,
        shuffle=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, batch_size=batch_size, shuffle=False)

    linear_feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    dnn_feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    feature_columns = linear_feature_columns + dnn_feature_columns
    estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
        linear_feature_columns=linear_feature_columns,
        dnn_hidden_units=(2, 2),
        dnn_feature_columns=dnn_feature_columns,
        label_dimension=label_dimension,
        model_dir=self._model_dir,
        # TODO(isaprykin): Work around the colocate_with error.
        dnn_optimizer=adagrad.AdagradOptimizer(0.001),
        linear_optimizer=adagrad.AdagradOptimizer(0.001),
        config=run_config.RunConfig(
            train_distribute=distribution, eval_distribute=distribution))

    num_steps = 10
    if use_train_and_evaluate:
      scores, _ = training.train_and_evaluate(
          estimator,
          training.TrainSpec(train_input_fn, max_steps=num_steps),
          training.EvalSpec(eval_input_fn))
    else:
      estimator.train(train_input_fn, steps=num_steps)
      scores = estimator.evaluate(eval_input_fn)

    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', scores)

    predictions = np.array([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in estimator.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, label_dimension), predictions.shape)

    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = estimator.export_saved_model(tempfile.mkdtemp(),
                                              serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
 def test_weight_column_should_not_be_used_as_feature(self):
     with self.assertRaisesRegexp(
             ValueError, 'weight_column should not be used as feature'):
         self._parse_example_fn(feature_columns=[fc.numeric_column('a')],
                                label_key='b',
                                weight_column=fc.numeric_column('a'))