def test_sequence_length_not_equal(self):
        """Tests that an error is raised when sequence lengths are not equal."""
        # Input a with sequence_length = [2, 1]
        sparse_input_a = sparse_tensor.SparseTensorValue(indices=((0, 0),
                                                                  (0, 1), (1,
                                                                           0)),
                                                         values=(0., 1., 10.),
                                                         dense_shape=(2, 2))
        # Input b with sequence_length = [1, 1]
        sparse_input_b = sparse_tensor.SparseTensorValue(indices=((0, 0), (1,
                                                                           0)),
                                                         values=(1., 10.),
                                                         dense_shape=(2, 2))
        numeric_column_a = sfc.sequence_numeric_column('aaa')
        numeric_column_b = sfc.sequence_numeric_column('bbb')

        _, sequence_length = sfc.sequence_input_layer(
            features={
                'aaa': sparse_input_a,
                'bbb': sparse_input_b,
            },
            feature_columns=[numeric_column_a, numeric_column_b])

        with monitored_session.MonitoredSession() as sess:
            with self.assertRaisesRegexp(
                    errors.InvalidArgumentError,
                    r'\[Condition x == y did not hold element-wise:\] '
                    r'\[x \(sequence_input_layer/aaa/sequence_length:0\) = \] \[2 1\] '
                    r'\[y \(sequence_input_layer/bbb/sequence_length:0\) = \] \[1 1\]'
            ):
                sess.run(sequence_length)
  def test_sequence_length_not_equal(self):
    """Tests that an error is raised when sequence lengths are not equal."""
    # Input a with sequence_length = [2, 1]
    sparse_input_a = sparse_tensor.SparseTensorValue(
        indices=((0, 0), (0, 1), (1, 0)),
        values=(0., 1., 10.),
        dense_shape=(2, 2))
    # Input b with sequence_length = [1, 1]
    sparse_input_b = sparse_tensor.SparseTensorValue(
        indices=((0, 0), (1, 0)),
        values=(1., 10.),
        dense_shape=(2, 2))
    numeric_column_a = sfc.sequence_numeric_column('aaa')
    numeric_column_b = sfc.sequence_numeric_column('bbb')

    _, sequence_length = sfc.sequence_input_layer(
        features={
            'aaa': sparse_input_a,
            'bbb': sparse_input_b,
        },
        feature_columns=[numeric_column_a, numeric_column_b])

    with monitored_session.MonitoredSession() as sess:
      with self.assertRaisesRegexp(
          errors.InvalidArgumentError,
          r'\[Condition x == y did not hold element-wise:\] '
          r'\[x \(sequence_input_layer/aaa/sequence_length:0\) = \] \[2 1\] '
          r'\[y \(sequence_input_layer/bbb/sequence_length:0\) = \] \[1 1\]'):
        sess.run(sequence_length)
  def test_shape_must_be_positive_integer(self):
    with self.assertRaisesRegexp(TypeError, 'shape dimensions must be integer'):
      sfc.sequence_numeric_column('aaa', shape=[1.0])

    with self.assertRaisesRegexp(
        ValueError, 'shape dimensions must be greater than 0'):
      sfc.sequence_numeric_column('aaa', shape=[0])
示例#4
0
    def testMultiClassFromCheckpoint(self):
        initial_global_step = 100
        create_checkpoint(rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
                          rnn_biases=[.2, .5],
                          logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
                          logits_biases=[0.3, 0.4, 0.5],
                          global_step=initial_global_step,
                          model_dir=self._model_dir)

        def train_input_fn():
            return {
                'price':
                sparse_tensor.SparseTensor(values=[10., 5., 2., 7.],
                                           indices=[[0, 0], [0, 1], [1, 0],
                                                    [1, 1]],
                                           dense_shape=[2, 2]),
            }, [[0], [1]]

        # Uses same checkpoint and examples as testMultiClassEvaluationMetrics.
        # See that test for loss calculation.
        mock_optimizer = self._mock_optimizer(expected_loss=2.662932)

        sequence_feature_columns = [
            seq_fc.sequence_numeric_column('price', shape=(1, ))
        ]
        est = rnn.RNNClassifier(
            num_units=[2],
            sequence_feature_columns=sequence_feature_columns,
            n_classes=3,
            optimizer=mock_optimizer,
            model_dir=self._model_dir)
        self.assertEqual(0, mock_optimizer.minimize.call_count)
        est.train(input_fn=train_input_fn, steps=10)
        self.assertEqual(1, mock_optimizer.minimize.call_count)
示例#5
0
  def testMultiExampleMultiDim(self):
    """Tests multiple examples and multi-dimensional logits.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[10], [5]], [[2], [7]]]
    initial_state = [[0, 0], [0, 0]]
    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
                              tanh(-.2*10 - .3*0 - .4*0 +.5)],
                             [tanh(.1*2 + .2*0 + .3*0 +.2),
                              tanh(-.2*2 - .3*0 - .4*0 +.5)]]
                          = [[0.83, -0.91], [0.38, 0.10]]
    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)],
                             [tanh(.1*7 + .2*.38 + .3*.10 +.2),
                              tanh(-.2*7 - .3*.38 - .4*.10 +.5)]]
                          = [[0.53, -0.37], [0.76, -0.78]
    logits = [[-1*0.53 - 1*0.37 + 0.3,
               0.5*0.53 + 0.3*0.37 + 0.4,
               0.2*0.53 - 0.1*0.37 + 0.5],
              [-1*0.76 - 1*0.78 + 0.3,
               0.5*0.76 +0.3*0.78 + 0.4,
               0.2*0.76 -0.1*0.78 + 0.5]]
           = [[-0.6033, 0.7777, 0.5698], [-1.2473, 1.0170, 0.5745]]
    """
    base_global_step = 100
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
        logits_biases=[0.3, 0.4, 0.5],
        global_step=base_global_step,
        model_dir=self._model_dir)

    def features_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2., 7.],
                  indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
                  dense_shape=[2, 2]),
      }

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))
    ]
    context_feature_columns = []

    for mode in [
        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
        model_fn.ModeKeys.PREDICT
    ]:
      self._test_logits(
          mode,
          rnn_units=[2],
          logits_dimension=3,
          features_fn=features_fn,
          sequence_feature_columns=sequence_feature_columns,
          context_feature_columns=context_feature_columns,
          expected_logits=[[-0.6033, 0.7777, 0.5698],
                           [-1.2473, 1.0170, 0.5745]])
  def test_get_sequence_dense_tensor_with_normalizer_fn(self):

    def _increment_two(input_sparse_tensor):
      return sparse_ops.sparse_add(
          input_sparse_tensor,
          sparse_tensor.SparseTensor(((0, 0), (1, 1)), (2.0, 2.0), (2, 2))
      )

    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, values [[0.], [1]]
        # example 1, [[10.]]
        indices=((0, 0), (0, 1), (1, 0)),
        values=(0., 1., 10.),
        dense_shape=(2, 2))

    # Before _increment_two:
    #   [[0.], [1.]],
    #   [[10.], [0.]],
    # After _increment_two:
    #   [[2.], [1.]],
    #   [[10.], [2.]],
    expected_dense_tensor = [
        [[2.], [1.]],
        [[10.], [2.]],
    ]
    numeric_column = sfc.sequence_numeric_column(
        'aaa', normalizer_fn=_increment_two)

    dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(
          expected_dense_tensor, dense_tensor.eval(session=sess))
  def test_numeric_column_multi_dim(self):
    """Tests sequence_input_layer for multi-dimensional numeric_column."""
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
        # example 1, [[[10., 11.],  [12., 13.]]]
        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7),
                 (1, 0), (1, 1), (1, 2), (1, 3)),
        values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
        dense_shape=(2, 8))
    # The output of numeric_column._get_dense_tensor should be flattened.
    expected_input_layer = [
        [[0., 1., 2., 3.], [4., 5., 6., 7.]],
        [[10., 11., 12., 13.], [0., 0., 0., 0.]],
    ]
    expected_sequence_length = [2, 1]
    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))

    input_layer, sequence_length = sfc.sequence_input_layer(
        features={'aaa': sparse_input},
        feature_columns=[numeric_column])

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess))
      self.assertAllEqual(
          expected_sequence_length, sequence_length.eval(session=sess))
    def test_numeric_column_multi_dim(self):
        """Tests sequence_input_layer for multi-dimensional numeric_column."""
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
            # example 1, [[[10., 11.],  [12., 13.]]]
            indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
                     (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
            values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
            dense_shape=(2, 8))
        # The output of numeric_column._get_dense_tensor should be flattened.
        expected_input_layer = [
            [[0., 1., 2., 3.], [4., 5., 6., 7.]],
            [[10., 11., 12., 13.], [0., 0., 0., 0.]],
        ]
        expected_sequence_length = [2, 1]
        numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))

        input_layer, sequence_length = sfc.sequence_input_layer(
            features={'aaa': sparse_input}, feature_columns=[numeric_column])

        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(expected_input_layer,
                                input_layer.eval(session=sess))
            self.assertAllEqual(expected_sequence_length,
                                sequence_length.eval(session=sess))
  def testMultiExampleMultiDim(self):
    """Tests multiple examples and multi-dimensional logits.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[10], [5]], [[2], [7]]]
    initial_state = [[0, 0], [0, 0]]
    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
                              tanh(-.2*10 - .3*0 - .4*0 +.5)],
                             [tanh(.1*2 + .2*0 + .3*0 +.2),
                              tanh(-.2*2 - .3*0 - .4*0 +.5)]]
                          = [[0.83, -0.91], [0.38, 0.10]]
    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)],
                             [tanh(.1*7 + .2*.38 + .3*.10 +.2),
                              tanh(-.2*7 - .3*.38 - .4*.10 +.5)]]
                          = [[0.53, -0.37], [0.76, -0.78]
    logits = [[-1*0.53 - 1*0.37 + 0.3,
               0.5*0.53 + 0.3*0.37 + 0.4,
               0.2*0.53 - 0.1*0.37 + 0.5],
              [-1*0.76 - 1*0.78 + 0.3,
               0.5*0.76 +0.3*0.78 + 0.4,
               0.2*0.76 -0.1*0.78 + 0.5]]
           = [[-0.6033, 0.7777, 0.5698], [-1.2473, 1.0170, 0.5745]]
    """
    base_global_step = 100
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
        logits_biases=[0.3, 0.4, 0.5],
        global_step=base_global_step,
        model_dir=self._model_dir)

    def features_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2., 7.],
                  indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
                  dense_shape=[2, 2]),
      }

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))
    ]
    context_feature_columns = []

    for mode in [
        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
        model_fn.ModeKeys.PREDICT
    ]:
      self._test_logits(
          mode,
          rnn_units=[2],
          logits_dimension=3,
          features_fn=features_fn,
          sequence_feature_columns=sequence_feature_columns,
          context_feature_columns=context_feature_columns,
          expected_logits=[[-0.6033, 0.7777, 0.5698],
                           [-1.2473, 1.0170, 0.5745]])
示例#10
0
  def testMultiClassFromCheckpoint(self):
    initial_global_step = 100
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
        logits_biases=[0.3, 0.4, 0.5],
        global_step=initial_global_step,
        model_dir=self._model_dir)

    def train_input_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2., 7.],
                  indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
                  dense_shape=[2, 2]),
      }, [[0], [1]]

    # Uses same checkpoint and examples as testMultiClassEvaluationMetrics.
    # See that test for loss calculation.
    mock_optimizer = self._mock_optimizer(expected_loss=1.331465)

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]
    est = rnn.RNNClassifier(
        num_units=[2],
        sequence_feature_columns=sequence_feature_columns,
        n_classes=3,
        optimizer=mock_optimizer,
        model_dir=self._model_dir)
    self.assertEqual(0, mock_optimizer.minimize.call_count)
    est.train(input_fn=train_input_fn, steps=10)
    self.assertEqual(1, mock_optimizer.minimize.call_count)
 def test_defaults(self):
   a = sfc.sequence_numeric_column('aaa')
   self.assertEqual('aaa', a.key)
   self.assertEqual('aaa', a.name)
   self.assertEqual('aaa', a._var_scope_name)
   self.assertEqual((1,), a.shape)
   self.assertEqual(0., a.default_value)
   self.assertEqual(dtypes.float32, a.dtype)
示例#12
0
 def test_defaults(self):
     a = sfc.sequence_numeric_column('aaa')
     self.assertEqual('aaa', a.key)
     self.assertEqual('aaa', a.name)
     self.assertEqual('aaa', a._var_scope_name)
     self.assertEqual((1, ), a.shape)
     self.assertEqual(0., a.default_value)
     self.assertEqual(dtypes.float32, a.dtype)
示例#13
0
  def testMultiExamplesWithContext(self):
    """Tests multiple examples with context features.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[10, -0.5], [5, -0.5]], [[2, 0.8], [0, 0]]]
    initial_state = [[0, 0], [0, 0]]
    rnn_output_timestep_1 = [[tanh(.1*10 - 1*.5 + .2*0 + .3*0 +.2),
                              tanh(-.2*10 - 0.9*.5 - .3*0 - .4*0 +.5)],
                             [tanh(.1*2 + 1*.8 + .2*0 + .3*0 +.2),
                              tanh(-.2*2 + .9*.8 - .3*0 - .4*0 +.5)]]
                          = [[0.60, -0.96], [0.83, 0.68]]
    rnn_output_timestep_2 = [[tanh(.1*5 - 1*.5 + .2*.60 - .3*.96 +.2),
                              tanh(-.2*5 - .9*.5 - .3*.60 + .4*.96 +.5)],
                             [<ignored-padding>]]
                          = [[0.03, -0.63], [<ignored-padding>]]
    logits = [[-1*0.03 - 1*0.63 + 0.3],
              [-1*0.83 + 1*0.68 + 0.3]]
           = [[-0.3662], [0.1414]]
    """
    base_global_step = 100
    create_checkpoint(
        # Context features weights are inserted between input and state weights.
        rnn_weights=[[.1, -.2], [1., 0.9], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1.], [1.]],
        logits_biases=[0.3],
        global_step=base_global_step,
        model_dir=self._model_dir)

    def features_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2.],
                  indices=[[0, 0], [0, 1], [1, 0]],
                  dense_shape=[2, 2]),
          'context': [[-0.5], [0.8]],
      }

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]
    context_feature_columns = [fc.numeric_column('context', shape=(1,))]

    for mode in [
        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
        model_fn.ModeKeys.PREDICT
    ]:
      self._test_logits(
          mode,
          rnn_units=[2],
          logits_dimension=1,
          features_fn=features_fn,
          sequence_feature_columns=sequence_feature_columns,
          context_feature_columns=context_feature_columns,
          expected_logits=[[-0.3662], [0.1414]])
示例#14
0
    def testMultiExamplesWithContext(self):
        """Tests multiple examples with context features.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[10, -0.5], [5, -0.5]], [[2, 0.8], [0, 0]]]
    initial_state = [[0, 0], [0, 0]]
    rnn_output_timestep_1 = [[tanh(.1*10 - 1*.5 + .2*0 + .3*0 +.2),
                              tanh(-.2*10 - 0.9*.5 - .3*0 - .4*0 +.5)],
                             [tanh(.1*2 + 1*.8 + .2*0 + .3*0 +.2),
                              tanh(-.2*2 + .9*.8 - .3*0 - .4*0 +.5)]]
                          = [[0.60, -0.96], [0.83, 0.68]]
    rnn_output_timestep_2 = [[tanh(.1*5 - 1*.5 + .2*.60 - .3*.96 +.2),
                              tanh(-.2*5 - .9*.5 - .3*.60 + .4*.96 +.5)],
                             [<ignored-padding>]]
                          = [[0.03, -0.63], [<ignored-padding>]]
    logits = [[-1*0.03 - 1*0.63 + 0.3],
              [-1*0.83 + 1*0.68 + 0.3]]
           = [[-0.3662], [0.1414]]
    """
        base_global_step = 100
        create_checkpoint(
            # Context features weights are inserted between input and state weights.
            rnn_weights=[[.1, -.2], [1., 0.9], [.2, -.3], [.3, -.4]],
            rnn_biases=[.2, .5],
            logits_weights=[[-1.], [1.]],
            logits_biases=[0.3],
            global_step=base_global_step,
            model_dir=self._model_dir)

        def features_fn():
            return {
                'price':
                sparse_tensor.SparseTensor(values=[10., 5., 2.],
                                           indices=[[0, 0], [0, 1], [1, 0]],
                                           dense_shape=[2, 2]),
                'context': [[-0.5], [0.8]],
            }

        sequence_feature_columns = [
            seq_fc.sequence_numeric_column('price', shape=(1, ))
        ]
        context_feature_columns = [fc.numeric_column('context', shape=(1, ))]

        for mode in [
                model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
                model_fn.ModeKeys.PREDICT
        ]:
            self._test_logits(
                mode,
                rnn_units=[2],
                logits_dimension=1,
                features_fn=features_fn,
                sequence_feature_columns=sequence_feature_columns,
                context_feature_columns=context_feature_columns,
                expected_logits=[[-0.3662], [0.1414]])
示例#15
0
  def testOneDimLogits(self, return_sequences, expected_logits):
    """Tests one-dimensional logits.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[10]], [[5]]]
    initial_state = [0, 0]
    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
                              tanh(-.2*10 - .3*0 - .4*0 +.5)]]
                          = [[0.83, -0.91]]
    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)]]
                          = [[0.53, -0.37]]
    logits_timestep_1 = [[-1*0.83 - 1*0.91 + 0.3]] = [[-1.4388]]
    logits_timestep_2 = [[-1*0.53 - 1*0.37 + 0.3]] = [[-0.6033]]

    Args:
      return_sequences: A boolean indicating whether to return the last output
        in the output sequence, or the full sequence.
      expected_logits: An array with expected logits result.
    """
    base_global_step = 100
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1.], [1.]],
        logits_biases=[0.3],
        global_step=base_global_step,
        model_dir=self._model_dir)

    def features_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5.],
                  indices=[[0, 0], [0, 1]],
                  dense_shape=[1, 2]),
      }

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]
    context_feature_columns = []
    for mode in [
        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
        model_fn.ModeKeys.PREDICT
    ]:
      self._test_logits(
          mode,
          rnn_units=[2],
          logits_dimension=1,
          features_fn=features_fn,
          sequence_feature_columns=sequence_feature_columns,
          context_feature_columns=context_feature_columns,
          expected_logits=expected_logits,
          return_sequences=return_sequences)
  def testMultiExamplesDifferentLength(self):
    """Tests multiple examples with different lengths.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[10], [5]], [[2], [0]]]
    initial_state = [[0, 0], [0, 0]]
    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
                              tanh(-.2*10 - .3*0 - .4*0 +.5)],
                             [tanh(.1*2 + .2*0 + .3*0 +.2),
                              tanh(-.2*2 - .3*0 - .4*0 +.5)]]
                          = [[0.83, -0.91], [0.38, 0.10]]
    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)],
                             [<ignored-padding>]]
                          = [[0.53, -0.37], [<ignored-padding>]]
    logits = [[-1*0.53 - 1*0.37 + 0.3],
              [-1*0.38 + 1*0.10 + 0.3]]
           = [[-0.6033], [0.0197]]
    """
    base_global_step = 100
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1.], [1.]],
        logits_biases=[0.3],
        global_step=base_global_step,
        model_dir=self._model_dir)

    def features_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2.],
                  indices=[[0, 0], [0, 1], [1, 0]],
                  dense_shape=[2, 2]),
      }

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]
    context_feature_columns = []

    for mode in [
        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
        model_fn.ModeKeys.PREDICT
    ]:
      self._test_logits(
          mode,
          rnn_units=[2],
          logits_dimension=1,
          features_fn=features_fn,
          sequence_feature_columns=sequence_feature_columns,
          context_feature_columns=context_feature_columns,
          expected_logits=[[-0.6033], [0.0197]])
示例#17
0
  def testMultiExamplesDifferentLength(self):
    """Tests multiple examples with different lengths.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[10], [5]], [[2], [0]]]
    initial_state = [[0, 0], [0, 0]]
    rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2),
                              tanh(-.2*10 - .3*0 - .4*0 +.5)],
                             [tanh(.1*2 + .2*0 + .3*0 +.2),
                              tanh(-.2*2 - .3*0 - .4*0 +.5)]]
                          = [[0.83, -0.91], [0.38, 0.10]]
    rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2),
                              tanh(-.2*5 - .3*.83 + .4*.91 +.5)],
                             [<ignored-padding>]]
                          = [[0.53, -0.37], [<ignored-padding>]]
    logits = [[-1*0.53 - 1*0.37 + 0.3],
              [-1*0.38 + 1*0.10 + 0.3]]
           = [[-0.6033], [0.0197]]
    """
    base_global_step = 100
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1.], [1.]],
        logits_biases=[0.3],
        global_step=base_global_step,
        model_dir=self._model_dir)

    def features_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2.],
                  indices=[[0, 0], [0, 1], [1, 0]],
                  dense_shape=[2, 2]),
      }

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]
    context_feature_columns = []

    for mode in [
        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
        model_fn.ModeKeys.PREDICT
    ]:
      self._test_logits(
          mode,
          rnn_units=[2],
          logits_dimension=1,
          features_fn=features_fn,
          sequence_feature_columns=sequence_feature_columns,
          context_feature_columns=context_feature_columns,
          expected_logits=[[-0.6033], [0.0197]])
示例#18
0
  def testBinaryClassEvaluationMetrics(self):
    global_step = 100
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1.], [1.]],
        logits_biases=[0.3],
        global_step=global_step,
        model_dir=self._model_dir)

    def eval_input_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2.],
                  indices=[[0, 0], [0, 1], [1, 0]],
                  dense_shape=[2, 2]),
      }, [[0], [1]]

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]

    est = rnn.RNNClassifier(
        num_units=[2],
        sequence_feature_columns=sequence_feature_columns,
        n_classes=2,
        model_dir=self._model_dir)
    eval_metrics = est.evaluate(eval_input_fn, steps=1)

    # Uses identical numbers to testMultiExamplesWithDifferentLength.
    # See that test for logits calculation.
    # logits = [[-0.603282], [0.019719]]
    # probability = exp(logits) / (1 + exp(logits)) = [[0.353593], [0.504930]]
    # loss = -label * ln(p) - (1 - label) * ln(1 - p)
    #      = [[0.436326], [0.683335]]
    expected_metrics = {
        ops.GraphKeys.GLOBAL_STEP: global_step,
        metric_keys.MetricKeys.LOSS: 1.119661,
        metric_keys.MetricKeys.LOSS_MEAN: 0.559831,
        metric_keys.MetricKeys.ACCURACY: 1.0,
        metric_keys.MetricKeys.PREDICTION_MEAN: 0.429262,
        metric_keys.MetricKeys.LABEL_MEAN: 0.5,
        metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5,
        # With default threshold of 0.5, the model is a perfect classifier.
        metric_keys.MetricKeys.RECALL: 1.0,
        metric_keys.MetricKeys.PRECISION: 1.0,
        # Positive example is scored above negative, so AUC = 1.0.
        metric_keys.MetricKeys.AUC: 1.0,
        metric_keys.MetricKeys.AUC_PR: 1.0,
    }
    self.assertAllClose(
        sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
  def testMultiClassEvaluationMetrics(self):
    global_step = 100
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
        logits_biases=[0.3, 0.4, 0.5],
        global_step=global_step,
        model_dir=self._model_dir)

    def eval_input_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2., 7.],
                  indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
                  dense_shape=[2, 2]),
      }, [[0], [1]]

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]

    est = rnn.RNNClassifier(
        num_units=[2],
        sequence_feature_columns=sequence_feature_columns,
        n_classes=3,
        model_dir=self._model_dir)
    eval_metrics = est.evaluate(eval_input_fn, steps=1)

    # Uses identical numbers to testMultiExampleMultiDim.
    # See that test for logits calculation.
    # logits = [[-0.603282, 0.777708, 0.569756],
    #           [-1.247356, 1.017018, 0.574481]]
    # logits_exp = exp(logits) / (1 + exp(logits))
    #            = [[0.547013, 2.176468, 1.767836],
    #               [0.287263, 2.764937, 1.776208]]
    # softmax_probabilities = logits_exp / logits_exp.sum()
    #                       = [[0.121793, 0.484596, 0.393611],
    #                          [0.059494, 0.572639, 0.367866]]
    # loss = -1. * log(softmax[label])
    #      = [[2.105432], [0.557500]]
    # sum_over_batch_size = (2.105432 + 0.557500)/2
    expected_metrics = {
        ops.GraphKeys.GLOBAL_STEP: global_step,
        metric_keys.MetricKeys.LOSS: 1.331465,
        metric_keys.MetricKeys.LOSS_MEAN: 1.331466,
        metric_keys.MetricKeys.ACCURACY: 0.5,
    }

    self.assertAllClose(
        sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
示例#20
0
  def testMultiClassEvaluationMetrics(self):
    global_step = 100
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
        logits_biases=[0.3, 0.4, 0.5],
        global_step=global_step,
        model_dir=self._model_dir)

    def eval_input_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2., 7.],
                  indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
                  dense_shape=[2, 2]),
      }, [[0], [1]]

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]

    est = rnn.RNNClassifier(
        num_units=[2],
        sequence_feature_columns=sequence_feature_columns,
        n_classes=3,
        model_dir=self._model_dir)
    eval_metrics = est.evaluate(eval_input_fn, steps=1)

    # Uses identical numbers to testMultiExampleMultiDim.
    # See that test for logits calculation.
    # logits = [[-0.603282, 0.777708, 0.569756],
    #           [-1.247356, 1.017018, 0.574481]]
    # logits_exp = exp(logits) / (1 + exp(logits))
    #            = [[0.547013, 2.176468, 1.767836],
    #               [0.287263, 2.764937, 1.776208]]
    # softmax_probabilities = logits_exp / logits_exp.sum()
    #                       = [[0.121793, 0.484596, 0.393611],
    #                          [0.059494, 0.572639, 0.367866]]
    # loss = -1. * log(softmax[label])
    #      = [[2.105432], [0.557500]]
    # sum_over_batch_size = (2.105432 + 0.557500)/2
    expected_metrics = {
        ops.GraphKeys.GLOBAL_STEP: global_step,
        metric_keys.MetricKeys.LOSS: 1.331465,
        metric_keys.MetricKeys.LOSS_MEAN: 1.331466,
        metric_keys.MetricKeys.ACCURACY: 0.5,
    }

    self.assertAllClose(
        sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
  def testBinaryClassPredictions(self):
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1.], [1.]],
        logits_biases=[0.3],
        global_step=0,
        model_dir=self._model_dir)

    def predict_input_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5.],
                  indices=[[0, 0], [0, 1]],
                  dense_shape=[1, 2]),
      }

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]
    label_vocabulary = ['class_0', 'class_1']

    est = rnn.RNNClassifier(
        num_units=[2],
        sequence_feature_columns=sequence_feature_columns,
        n_classes=2,
        label_vocabulary=label_vocabulary,
        model_dir=self._model_dir)
    # Uses identical numbers to testOneDimLogits.
    # See that test for logits calculation.
    # logits = [-0.603282]
    # logistic = exp(-0.6033) / (1 + exp(-0.6033)) = [0.353593]
    # probabilities = [0.646407, 0.353593]
    # class_ids = argmax(probabilities) = [0]
    predictions = next(est.predict(predict_input_fn))
    self.assertAllClose([-0.603282],
                        predictions[prediction_keys.PredictionKeys.LOGITS])
    self.assertAllClose([0.353593],
                        predictions[prediction_keys.PredictionKeys.LOGISTIC])
    self.assertAllClose(
        [0.646407, 0.353593],
        predictions[prediction_keys.PredictionKeys.PROBABILITIES])
    self.assertAllClose([0],
                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
    self.assertEqual([b'class_0'],
                     predictions[prediction_keys.PredictionKeys.CLASSES])
  def test_sequence_length_with_shape(self):
    """Tests _sequence_length with shape !=(1,)."""
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, values [[0.], [1]]
        # example 1, [[10.]]
        indices=((0, 0), (0, 1), (1, 0)),
        values=(0., 1., 10.),
        dense_shape=(2, 2))
    expected_sequence_length = [2, 1]
    numeric_column = sfc.sequence_numeric_column('aaa')

    _, sequence_length = numeric_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(
          expected_sequence_length, sequence_length.eval(session=sess))
  def test_sequence_length(self):
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, values [[0., 1., 2.], [3., 4., 5.]]
        # example 1, [[10., 11., 12.]]
        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5),
                 (1, 0), (1, 1), (1, 2)),
        values=(0., 1., 2., 3., 4., 5., 10., 11., 12.),
        dense_shape=(2, 6))
    expected_sequence_length = [2, 1]
    numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,))

    _, sequence_length = numeric_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(
          expected_sequence_length, sequence_length.eval(session=sess))
示例#24
0
  def testBinaryClassPredictions(self):
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1.], [1.]],
        logits_biases=[0.3],
        global_step=0,
        model_dir=self._model_dir)

    def predict_input_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5.],
                  indices=[[0, 0], [0, 1]],
                  dense_shape=[1, 2]),
      }

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]
    label_vocabulary = ['class_0', 'class_1']

    est = rnn.RNNClassifier(
        num_units=[2],
        sequence_feature_columns=sequence_feature_columns,
        n_classes=2,
        label_vocabulary=label_vocabulary,
        model_dir=self._model_dir)
    # Uses identical numbers to testOneDimLogits.
    # See that test for logits calculation.
    # logits = [-0.603282]
    # logistic = exp(-0.6033) / (1 + exp(-0.6033)) = [0.353593]
    # probabilities = [0.646407, 0.353593]
    # class_ids = argmax(probabilities) = [0]
    predictions = next(est.predict(predict_input_fn))
    self.assertAllClose([-0.603282],
                        predictions[prediction_keys.PredictionKeys.LOGITS])
    self.assertAllClose([0.353593],
                        predictions[prediction_keys.PredictionKeys.LOGISTIC])
    self.assertAllClose(
        [0.646407, 0.353593],
        predictions[prediction_keys.PredictionKeys.PROBABILITIES])
    self.assertAllClose([0],
                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
    self.assertEqual([b'class_0'],
                     predictions[prediction_keys.PredictionKeys.CLASSES])
    def test_sequence_length_with_shape(self):
        """Tests _sequence_length with shape !=(1,)."""
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, values [[0.], [1]]
            # example 1, [[10.]]
            indices=((0, 0), (0, 1), (1, 0)),
            values=(0., 1., 10.),
            dense_shape=(2, 2))
        expected_sequence_length = [2, 1]
        numeric_column = sfc.sequence_numeric_column('aaa')

        _, sequence_length = numeric_column._get_sequence_dense_tensor(
            _LazyBuilder({'aaa': sparse_input}))

        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(expected_sequence_length,
                                sequence_length.eval(session=sess))
  def testMultiClassPredictions(self):
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
        logits_biases=[0.3, 0.4, 0.5],
        global_step=0,
        model_dir=self._model_dir)

    def predict_input_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5.],
                  indices=[[0, 0], [0, 1]],
                  dense_shape=[1, 2]),
      }

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]
    label_vocabulary = ['class_0', 'class_1', 'class_2']

    est = rnn.RNNClassifier(
        num_units=[2],
        sequence_feature_columns=sequence_feature_columns,
        n_classes=3,
        label_vocabulary=label_vocabulary,
        model_dir=self._model_dir)
    # Uses identical numbers to testMultiDimLogits.
    # See that test for logits calculation.
    # logits = [-0.603282, 0.777708, 0.569756]
    # logits_exp = exp(logits) = [0.547013, 2.176468, 1.767836]
    # softmax_probabilities = logits_exp / logits_exp.sum()
    #                       = [0.121793, 0.484596, 0.393611]
    # class_ids = argmax(probabilities) = [1]
    predictions = next(est.predict(predict_input_fn))
    self.assertAllClose([-0.603282, 0.777708, 0.569756],
                        predictions[prediction_keys.PredictionKeys.LOGITS])
    self.assertAllClose(
        [0.121793, 0.484596, 0.393611],
        predictions[prediction_keys.PredictionKeys.PROBABILITIES])
    self.assertAllClose([1],
                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
    self.assertEqual([b'class_1'],
                     predictions[prediction_keys.PredictionKeys.CLASSES])
示例#27
0
  def testMultiClassPredictions(self):
    create_checkpoint(
        rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]],
        logits_biases=[0.3, 0.4, 0.5],
        global_step=0,
        model_dir=self._model_dir)

    def predict_input_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5.],
                  indices=[[0, 0], [0, 1]],
                  dense_shape=[1, 2]),
      }

    sequence_feature_columns = [
        seq_fc.sequence_numeric_column('price', shape=(1,))]
    label_vocabulary = ['class_0', 'class_1', 'class_2']

    est = rnn.RNNClassifier(
        num_units=[2],
        sequence_feature_columns=sequence_feature_columns,
        n_classes=3,
        label_vocabulary=label_vocabulary,
        model_dir=self._model_dir)
    # Uses identical numbers to testMultiDimLogits.
    # See that test for logits calculation.
    # logits = [-0.603282, 0.777708, 0.569756]
    # logits_exp = exp(logits) = [0.547013, 2.176468, 1.767836]
    # softmax_probabilities = logits_exp / logits_exp.sum()
    #                       = [0.121793, 0.484596, 0.393611]
    # class_ids = argmax(probabilities) = [1]
    predictions = next(est.predict(predict_input_fn))
    self.assertAllClose([-0.603282, 0.777708, 0.569756],
                        predictions[prediction_keys.PredictionKeys.LOGITS])
    self.assertAllClose(
        [0.121793, 0.484596, 0.393611],
        predictions[prediction_keys.PredictionKeys.PROBABILITIES])
    self.assertAllClose([1],
                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
    self.assertEqual([b'class_1'],
                     predictions[prediction_keys.PredictionKeys.CLASSES])
  def test_sequence_length(self):
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, values [[0., 1., 2.], [3., 4., 5.]]
        # example 1, [[10., 11., 12.]]
        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5),
                 (1, 0), (1, 1), (1, 2)),
        values=(0., 1., 2., 3., 4., 5., 10., 11., 12.),
        dense_shape=(2, 6))
    expected_sequence_length = [2, 1]
    numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,))

    _, sequence_length = numeric_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      sequence_length = sess.run(sequence_length)
      self.assertAllEqual(expected_sequence_length, sequence_length)
      self.assertEqual(np.int64, sequence_length.dtype)
    def test_get_sequence_dense_tensor(self):
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, values [[0.], [1]]
            # example 1, [[10.]]
            indices=((0, 0), (0, 1), (1, 0)),
            values=(0., 1., 10.),
            dense_shape=(2, 2))
        expected_dense_tensor = [
            [[0.], [1.]],
            [[10.], [0.]],
        ]
        numeric_column = sfc.sequence_numeric_column('aaa')

        dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
            _LazyBuilder({'aaa': sparse_input}))

        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(expected_dense_tensor,
                                dense_tensor.eval(session=sess))
  def test_get_sequence_dense_tensor(self):
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, values [[0.], [1]]
        # example 1, [[10.]]
        indices=((0, 0), (0, 1), (1, 0)),
        values=(0., 1., 10.),
        dense_shape=(2, 2))
    expected_dense_tensor = [
        [[0.], [1.]],
        [[10.], [0.]],
    ]
    numeric_column = sfc.sequence_numeric_column('aaa')

    dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(
          expected_dense_tensor, dense_tensor.eval(session=sess))
  def test_get_sequence_dense_tensor_with_shape(self):
    """Tests get_sequence_dense_tensor with shape !=(1,)."""
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, values [[0., 1., 2.], [3., 4., 5.]]
        # example 1, [[10., 11., 12.]]
        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5),
                 (1, 0), (1, 1), (1, 2)),
        values=(0., 1., 2., 3., 4., 5., 10., 11., 12.),
        dense_shape=(2, 6))
    expected_dense_tensor = [
        [[0., 1., 2.], [3., 4., 5.]],
        [[10., 11., 12.], [0., 0., 0.]],
    ]
    numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,))

    dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(
          expected_dense_tensor, dense_tensor.eval(session=sess))
    def test_sequence_length_with_empty_rows(self):
        """Tests _sequence_length when some examples do not have ids."""
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, values []
            # example 1, values [[0.], [1.]]
            # example 2, [[2.]]
            # example 3, values []
            # example 4, [[3.]]
            # example 5, values []
            indices=((1, 0), (1, 1), (2, 0), (4, 0)),
            values=(0., 1., 2., 3.),
            dense_shape=(6, 2))
        expected_sequence_length = [0, 2, 1, 0, 1, 0]
        numeric_column = sfc.sequence_numeric_column('aaa')

        _, sequence_length = numeric_column._get_sequence_dense_tensor(
            _LazyBuilder({'aaa': sparse_input}))

        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(expected_sequence_length,
                                sequence_length.eval(session=sess))
  def test_get_dense_tensor_multi_dim(self):
    """Tests get_sequence_dense_tensor for multi-dim numeric_column."""
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
        # example 1, [[[10., 11.],  [12., 13.]]]
        indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7),
                 (1, 0), (1, 1), (1, 2), (1, 3)),
        values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
        dense_shape=(2, 8))
    expected_dense_tensor = [
        [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]],
        [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]],
    ]
    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))

    dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(
          expected_dense_tensor, dense_tensor.eval(session=sess))
  def test_sequence_length_with_empty_rows(self):
    """Tests _sequence_length when some examples do not have ids."""
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, values []
        # example 1, values [[0.], [1.]]
        # example 2, [[2.]]
        # example 3, values []
        # example 4, [[3.]]
        # example 5, values []
        indices=((1, 0), (1, 1), (2, 0), (4, 0)),
        values=(0., 1., 2., 3.),
        dense_shape=(6, 2))
    expected_sequence_length = [0, 2, 1, 0, 1, 0]
    numeric_column = sfc.sequence_numeric_column('aaa')

    _, sequence_length = numeric_column._get_sequence_dense_tensor(
        _LazyBuilder({'aaa': sparse_input}))

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(
          expected_sequence_length, sequence_length.eval(session=sess))
    def test_get_sequence_dense_tensor_with_shape(self):
        """Tests get_sequence_dense_tensor with shape !=(1,)."""
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, values [[0., 1., 2.], [3., 4., 5.]]
            # example 1, [[10., 11., 12.]]
            indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 0),
                     (1, 1), (1, 2)),
            values=(0., 1., 2., 3., 4., 5., 10., 11., 12.),
            dense_shape=(2, 6))
        expected_dense_tensor = [
            [[0., 1., 2.], [3., 4., 5.]],
            [[10., 11., 12.], [0., 0., 0.]],
        ]
        numeric_column = sfc.sequence_numeric_column('aaa', shape=(3, ))

        dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
            _LazyBuilder({'aaa': sparse_input}))

        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(expected_dense_tensor,
                                dense_tensor.eval(session=sess))
    def test_get_dense_tensor_multi_dim(self):
        """Tests get_sequence_dense_tensor for multi-dim numeric_column."""
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, values [[[0., 1.],  [2., 3.]], [[4., 5.],  [6., 7.]]]
            # example 1, [[[10., 11.],  [12., 13.]]]
            indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
                     (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
            values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
            dense_shape=(2, 8))
        expected_dense_tensor = [
            [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]],
            [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]],
        ]
        numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))

        dense_tensor, _ = numeric_column._get_sequence_dense_tensor(
            _LazyBuilder({'aaa': sparse_input}))

        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(expected_dense_tensor,
                                dense_tensor.eval(session=sess))
    def test_numeric_column(self):
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, values [[0.], [1]]
            # example 1, [[10.]]
            indices=((0, 0), (0, 1), (1, 0)),
            values=(0., 1., 10.),
            dense_shape=(2, 2))
        expected_input_layer = [
            [[0.], [1.]],
            [[10.], [0.]],
        ]
        expected_sequence_length = [2, 1]
        numeric_column = sfc.sequence_numeric_column('aaa')

        input_layer, sequence_length = sfc.sequence_input_layer(
            features={'aaa': sparse_input}, feature_columns=[numeric_column])

        with monitored_session.MonitoredSession() as sess:
            self.assertAllEqual(expected_input_layer,
                                input_layer.eval(session=sess))
            self.assertAllEqual(expected_sequence_length,
                                sequence_length.eval(session=sess))
  def test_numeric_column(self):
    sparse_input = sparse_tensor.SparseTensorValue(
        # example 0, values [[0.], [1]]
        # example 1, [[10.]]
        indices=((0, 0), (0, 1), (1, 0)),
        values=(0., 1., 10.),
        dense_shape=(2, 2))
    expected_input_layer = [
        [[0.], [1.]],
        [[10.], [0.]],
    ]
    expected_sequence_length = [2, 1]
    numeric_column = sfc.sequence_numeric_column('aaa')

    input_layer, sequence_length = sfc.sequence_input_layer(
        features={'aaa': sparse_input},
        feature_columns=[numeric_column])

    with monitored_session.MonitoredSession() as sess:
      self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess))
      self.assertAllEqual(
          expected_sequence_length, sequence_length.eval(session=sess))
 def test_shape_saved_as_tuple(self):
   a = sfc.sequence_numeric_column('aaa', shape=[1, 2])
   self.assertEqual((1, 2), a.shape)
示例#40
0
  def testMultiExamplesMultiFeatures(self):
    """Tests examples with multiple sequential feature columns.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]]
    initial_state = [[0, 0], [0, 0]]
    rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2),
                              tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)],
                             [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2),
                              tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]]
                          = [[0.94, -0.96], [0.72, -0.38]]
    rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2),
                              tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)],
                             [<ignored-padding>]]
                          = [[0.92, -0.88], [<ignored-padding>]]
    logits = [[-1*0.92 - 1*0.88 + 0.3],
              [-1*0.72 - 1*0.38 + 0.3]]
           = [[-1.5056], [-0.7962]]
    """
    base_global_step = 100
    create_checkpoint(
        # FeatureColumns are sorted alphabetically, so on_sale weights are
        # inserted before price.
        rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3, -.4]],
        rnn_biases=[.2, .5],
        logits_weights=[[-1.], [1.]],
        logits_biases=[0.3],
        global_step=base_global_step,
        model_dir=self._model_dir)

    def features_fn():
      return {
          'price':
              sparse_tensor.SparseTensor(
                  values=[10., 5., 2.],
                  indices=[[0, 0], [0, 1], [1, 0]],
                  dense_shape=[2, 2]),
          'on_sale':
              sparse_tensor.SparseTensor(
                  values=[0, 1, 0],
                  indices=[[0, 0], [0, 1], [1, 0]],
                  dense_shape=[2, 2]),
      }

    price_column = seq_fc.sequence_numeric_column('price', shape=(1,))
    on_sale_column = fc.indicator_column(
        seq_fc.sequence_categorical_column_with_identity(
            'on_sale', num_buckets=2))
    sequence_feature_columns = [price_column, on_sale_column]
    context_feature_columns = []

    for mode in [
        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
        model_fn.ModeKeys.PREDICT
    ]:
      self._test_logits(
          mode,
          rnn_units=[2],
          logits_dimension=1,
          features_fn=features_fn,
          sequence_feature_columns=sequence_feature_columns,
          context_feature_columns=context_feature_columns,
          expected_logits=[[-1.5056], [-0.7962]])
    def make_columns():
        """
        Builds the feature_columns required by the estimator to link the Dataset and the model_fn
        :return:
        """
        columns_dict = {}

        columns_dict['gci'] = fc.indicator_column(
            fc.sequence_categorical_column_with_vocabulary_file(
                'gci',
                vocab_file,
                default_value="0"
            )
        )
        columns_dict['ta'] = (
            seq_fc.sequence_numeric_column(
                'ta', normalizer_fn=lambda x: normalize(x, 'ta', stats_dict)
            )
        )
        columns_dict['rsrp'] = (
            seq_fc.sequence_numeric_column(
                'rsrp', normalizer_fn=lambda x: normalize(
                    x, 'rsrp', stats_dict)))
        columns_dict['gci0'] = fc.indicator_column(
            fc.sequence_categorical_column_with_vocabulary_file(
                'gci0',
                vocab_file,
                default_value="0"
            )
        )
        columns_dict['rsrp0'] = (
            seq_fc.sequence_numeric_column(
                'rsrp0', normalizer_fn=lambda x: normalize(
                    x, 'rsrp0', stats_dict)))
        columns_dict['gci1'] = fc.indicator_column(
            fc.sequence_categorical_column_with_vocabulary_file(
                'gci1',
                vocab_file,
                default_value="0"
            )
        )
        columns_dict['rsrp1'] = (
            seq_fc.sequence_numeric_column(
                'rsrp1', normalizer_fn=lambda x: normalize(
                    x, 'rsrp1', stats_dict)))
        columns_dict['gci2'] = fc.indicator_column(
            fc.sequence_categorical_column_with_vocabulary_file(
                'gci2',
                vocab_file,
                default_value="0"
            )
        )
        columns_dict['rsrp2'] = (
            seq_fc.sequence_numeric_column(
                'rsrp2', normalizer_fn=lambda x: normalize(
                    x, 'rsrp2', stats_dict)))
        columns_dict['dt'] = (
            seq_fc.sequence_numeric_column(
                'dt', normalizer_fn=lambda x: normalize(x, 'dt', stats_dict)
            )
        )
        return columns_dict
示例#42
0
    def testMultiExamplesMultiFeatures(self):
        """Tests examples with multiple sequential feature columns.

    Intermediate values are rounded for ease in reading.
    input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]]
    initial_state = [[0, 0], [0, 0]]
    rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2),
                              tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)],
                             [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2),
                              tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]]
                          = [[0.94, -0.96], [0.72, -0.38]]
    rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2),
                              tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)],
                             [<ignored-padding>]]
                          = [[0.92, -0.88], [<ignored-padding>]]
    logits = [[-1*0.92 - 1*0.88 + 0.3],
              [-1*0.72 - 1*0.38 + 0.3]]
           = [[-1.5056], [-0.7962]]
    """
        base_global_step = 100
        create_checkpoint(
            # FeatureColumns are sorted alphabetically, so on_sale weights are
            # inserted before price.
            rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3,
                                                                      -.4]],
            rnn_biases=[.2, .5],
            logits_weights=[[-1.], [1.]],
            logits_biases=[0.3],
            global_step=base_global_step,
            model_dir=self._model_dir)

        def features_fn():
            return {
                'price':
                sparse_tensor.SparseTensor(values=[10., 5., 2.],
                                           indices=[[0, 0], [0, 1], [1, 0]],
                                           dense_shape=[2, 2]),
                'on_sale':
                sparse_tensor.SparseTensor(values=[0, 1, 0],
                                           indices=[[0, 0], [0, 1], [1, 0]],
                                           dense_shape=[2, 2]),
            }

        price_column = seq_fc.sequence_numeric_column('price', shape=(1, ))
        on_sale_column = fc.indicator_column(
            seq_fc.sequence_categorical_column_with_identity('on_sale',
                                                             num_buckets=2))
        sequence_feature_columns = [price_column, on_sale_column]
        context_feature_columns = []

        for mode in [
                model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
                model_fn.ModeKeys.PREDICT
        ]:
            self._test_logits(
                mode,
                rnn_units=[2],
                logits_dimension=1,
                features_fn=features_fn,
                sequence_feature_columns=sequence_feature_columns,
                context_feature_columns=context_feature_columns,
                expected_logits=[[-1.5056], [-0.7962]])
 def test_dtype_is_convertible_to_float(self):
   with self.assertRaisesRegexp(
       ValueError, 'dtype must be convertible to float'):
     sfc.sequence_numeric_column('aaa', dtype=dtypes.string)
示例#44
0
    def testBinaryClassEvaluationMetrics(self):
        global_step = 100
        create_checkpoint(rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]],
                          rnn_biases=[.2, .5],
                          logits_weights=[[-1.], [1.]],
                          logits_biases=[0.3],
                          global_step=global_step,
                          model_dir=self._model_dir)

        def eval_input_fn():
            return {
                'price':
                sparse_tensor.SparseTensor(values=[10., 5., 2.],
                                           indices=[[0, 0], [0, 1], [1, 0]],
                                           dense_shape=[2, 2]),
            }, [[0], [1]]

        sequence_feature_columns = [
            seq_fc.sequence_numeric_column('price', shape=(1, ))
        ]

        est = rnn.RNNClassifier(
            num_units=[2],
            sequence_feature_columns=sequence_feature_columns,
            n_classes=2,
            model_dir=self._model_dir)
        eval_metrics = est.evaluate(eval_input_fn, steps=1)

        # Uses identical numbers to testMultiExamplesWithDifferentLength.
        # See that test for logits calculation.
        # logits = [[-0.603282], [0.019719]]
        # probability = exp(logits) / (1 + exp(logits)) = [[0.353593], [0.504930]]
        # loss = -label * ln(p) - (1 - label) * ln(1 - p)
        #      = [[0.436326], [0.683335]]
        expected_metrics = {
            ops.GraphKeys.GLOBAL_STEP:
            global_step,
            metric_keys.MetricKeys.LOSS:
            1.119661,
            metric_keys.MetricKeys.LOSS_MEAN:
            0.559831,
            metric_keys.MetricKeys.ACCURACY:
            1.0,
            metric_keys.MetricKeys.PREDICTION_MEAN:
            0.429262,
            metric_keys.MetricKeys.LABEL_MEAN:
            0.5,
            metric_keys.MetricKeys.ACCURACY_BASELINE:
            0.5,
            # With default threshold of 0.5, the model is a perfect classifier.
            metric_keys.MetricKeys.RECALL:
            1.0,
            metric_keys.MetricKeys.PRECISION:
            1.0,
            # Positive example is scored above negative, so AUC = 1.0.
            metric_keys.MetricKeys.AUC:
            1.0,
            metric_keys.MetricKeys.AUC_PR:
            1.0,
        }
        self.assertAllClose(sorted_key_dict(expected_metrics),
                            sorted_key_dict(eval_metrics))
 def test_normalizer_fn_must_be_callable(self):
   with self.assertRaisesRegexp(TypeError, 'must be a callable'):
     sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable')