示例#1
0
 def test_exogenous_input(self):
     """Test that no errors are raised when using exogenous features."""
     dtype = dtypes.float64
     times = [1, 2, 3, 4, 5, 6]
     values = [[0.01], [5.10], [5.21], [0.30], [5.41], [0.50]]
     feature_a = [["off"], ["on"], ["on"], ["off"], ["on"], ["off"]]
     sparse_column_a = feature_column.sparse_column_with_keys(
         column_name="feature_a", keys=["on", "off"])
     one_hot_a = layers.one_hot_column(sparse_id_column=sparse_column_a)
     regressor = estimators.StructuralEnsembleRegressor(
         periodicities=[],
         num_features=1,
         moving_average_order=0,
         exogenous_feature_columns=[one_hot_a],
         dtype=dtype)
     features = {
         TrainEvalFeatures.TIMES: times,
         TrainEvalFeatures.VALUES: values,
         "feature_a": feature_a
     }
     train_input_fn = input_pipeline.RandomWindowInputFn(
         input_pipeline.NumpyReader(features), window_size=6, batch_size=1)
     regressor.train(input_fn=train_input_fn, steps=1)
     eval_input_fn = input_pipeline.WholeDatasetInputFn(
         input_pipeline.NumpyReader(features))
     evaluation = regressor.evaluate(input_fn=eval_input_fn, steps=1)
     predict_input_fn = input_pipeline.predict_continuation_input_fn(
         evaluation,
         times=[[7, 8, 9]],
         exogenous_features={"feature_a": [[["on"], ["off"], ["on"]]]})
     regressor.predict(input_fn=predict_input_fn)
示例#2
0
 def _test_initialization(self, warmup_iterations, batch_size):
     stub_model = StubTimeSeriesModel()
     data = self._make_test_data(length=20,
                                 cut_start=None,
                                 cut_end=None,
                                 offset=0.)
     if batch_size == -1:
         input_fn = test_utils.AllWindowInputFn(
             input_pipeline.NumpyReader(data), window_size=10)
     else:
         input_fn = input_pipeline.RandomWindowInputFn(
             input_pipeline.NumpyReader(data),
             window_size=10,
             batch_size=batch_size)
     chainer = state_management.ChainingStateManager(
         state_saving_interval=1)
     features, _ = input_fn()
     stub_model.initialize_graph()
     chainer.initialize_graph(model=stub_model)
     model_outputs = chainer.define_loss(model=stub_model,
                                         features=features,
                                         mode=estimator_lib.ModeKeys.TRAIN)
     with self.cached_session() as session:
         variables.global_variables_initializer().run()
         coordinator = coordinator_lib.Coordinator()
         queue_runner_impl.start_queue_runners(session, coord=coordinator)
         for _ in range(warmup_iterations):
             # Warm up saved state
             model_outputs.loss.eval()
         outputs = model_outputs.loss.eval()
         coordinator.request_stop()
         coordinator.join()
         return outputs
示例#3
0
 def _test_missing_values(self, cut_start, cut_end, offset):
     stub_model = StubTimeSeriesModel()
     data = self._make_test_data(length=100,
                                 cut_start=cut_start,
                                 cut_end=cut_end,
                                 offset=offset)
     input_fn = test_utils.AllWindowInputFn(
         input_pipeline.NumpyReader(data), window_size=10)
     chainer = state_management.ChainingStateManager(
         state_saving_interval=1)
     features, _ = input_fn()
     stub_model.initialize_graph()
     chainer.initialize_graph(model=stub_model)
     model_outputs = chainer.define_loss(model=stub_model,
                                         features=features,
                                         mode=estimator_lib.ModeKeys.TRAIN)
     with self.cached_session() as session:
         variables.global_variables_initializer().run()
         coordinator = coordinator_lib.Coordinator()
         queue_runner_impl.start_queue_runners(session, coord=coordinator)
         for _ in range(10):
             model_outputs.loss.eval()
         returned_loss = model_outputs.loss.eval()
         coordinator.request_stop()
         coordinator.join()
         return returned_loss
示例#4
0
 def test_numpy(self):
     data = _make_numpy_time_series(num_features=4, num_samples=100)
     time_series_reader = input_pipeline.NumpyReader(data)
     self._whole_dataset_input_fn_test_template(
         time_series_reader=time_series_reader,
         num_features=4,
         num_samples=100)
示例#5
0
 def test_multivariate(self):
     dtype = dtypes.float32
     num_features = 3
     covariance = numpy.eye(num_features)
     # A single off-diagonal has a non-zero value in the true transition
     # noise covariance.
     covariance[-1, 0] = 1.
     covariance[0, -1] = 1.
     dataset_size = 100
     values = numpy.cumsum(numpy.random.multivariate_normal(
         mean=numpy.zeros(num_features), cov=covariance, size=dataset_size),
                           axis=0)
     times = numpy.arange(dataset_size)
     model = MultivariateLevelModel(
         configuration=state_space_model.StateSpaceModelConfiguration(
             num_features=num_features,
             dtype=dtype,
             use_observation_noise=False,
             transition_covariance_initial_log_scale_bias=5.))
     estimator = estimators.StateSpaceRegressor(
         model=model,
         optimizer=gradient_descent.GradientDescentOptimizer(0.1))
     data = {
         feature_keys.TrainEvalFeatures.TIMES: times,
         feature_keys.TrainEvalFeatures.VALUES: values
     }
     train_input_fn = input_pipeline.RandomWindowInputFn(
         input_pipeline.NumpyReader(data), batch_size=16, window_size=16)
     estimator.train(input_fn=train_input_fn, steps=1)
     for component in model._ensemble_members:
         # Check that input statistics propagated to component models
         self.assertTrue(component._input_statistics)
示例#6
0
        def chained_model_outputs(original_model, data):
            input_fn = test_utils.AllWindowInputFn(
                input_pipeline.NumpyReader(data), window_size=chunk_size)
            state_manager = state_management.ChainingStateManager(
                state_saving_interval=1)
            features, _ = input_fn()
            state_manager.initialize_graph(original_model)
            model_outputs = state_manager.define_loss(
                model=original_model,
                features=features,
                mode=estimator_lib.ModeKeys.TRAIN)

            def _eval_outputs(session):
                for _ in range(50):
                    # Warm up saved state
                    model_outputs.loss.eval()
                (posterior_mean, posterior_var,
                 priors_from_time) = model_outputs.end_state
                posteriors = ((posterior_mean, ), (posterior_var, ),
                              priors_from_time)
                outputs = (model_outputs.loss, posteriors,
                           model_outputs.predictions)
                chunked_outputs_evaled = session.run(outputs)
                return chunked_outputs_evaled

            return _eval_outputs
示例#7
0
 def _gap_test_template(self, times, values):
     random_model = RandomStateSpaceModel(
         state_dimension=1,
         state_noise_dimension=1,
         configuration=state_space_model.StateSpaceModelConfiguration(
             num_features=1))
     random_model.initialize_graph()
     input_fn = input_pipeline.WholeDatasetInputFn(
         input_pipeline.NumpyReader({
             feature_keys.TrainEvalFeatures.TIMES:
             times,
             feature_keys.TrainEvalFeatures.VALUES:
             values
         }))
     features, _ = input_fn()
     times = features[feature_keys.TrainEvalFeatures.TIMES]
     values = features[feature_keys.TrainEvalFeatures.VALUES]
     model_outputs = random_model.get_batch_loss(
         features={
             feature_keys.TrainEvalFeatures.TIMES: times,
             feature_keys.TrainEvalFeatures.VALUES: values
         },
         mode=None,
         state=math_utils.replicate_state(
             start_state=random_model.get_start_state(),
             batch_size=array_ops.shape(times)[0]))
     with self.cached_session() as session:
         variables.global_variables_initializer().run()
         coordinator = coordinator_lib.Coordinator()
         queue_runner_impl.start_queue_runners(session, coord=coordinator)
         model_outputs.loss.eval()
         coordinator.request_stop()
         coordinator.join()
示例#8
0
 def test_numpy_nobatch_nofeatures(self):
     data = _make_numpy_time_series(num_features=1, num_samples=100)
     data[TrainEvalFeatures.VALUES] = data[TrainEvalFeatures.VALUES][:, 0]
     time_series_reader = input_pipeline.NumpyReader(data)
     self._whole_dataset_input_fn_test_template(
         time_series_reader=time_series_reader,
         num_features=1,
         num_samples=100)
示例#9
0
 def test_numpy(self):
     data = _make_numpy_time_series(num_features=2, num_samples=31)
     time_series_reader = input_pipeline.NumpyReader(data)
     self._all_window_input_fn_test_template(
         time_series_reader=time_series_reader,
         original_numpy_features=data,
         num_samples=31,
         window_size=5)
示例#10
0
 def _equivalent_to_single_model_test_template(self, model_generator):
     with self.cached_session() as session:
         random_model = RandomStateSpaceModel(
             state_dimension=5,
             state_noise_dimension=4,
             configuration=state_space_model.StateSpaceModelConfiguration(
                 dtype=dtypes.float64, num_features=1))
         random_model.initialize_graph()
         series_length = 10
         model_data = random_model.generate(
             number_of_series=1,
             series_length=series_length,
             model_parameters=random_model.random_model_parameters())
         input_fn = input_pipeline.WholeDatasetInputFn(
             input_pipeline.NumpyReader(model_data))
         features, _ = input_fn()
         model_outputs = random_model.get_batch_loss(
             features=features,
             mode=None,
             state=math_utils.replicate_state(
                 start_state=random_model.get_start_state(),
                 batch_size=array_ops.shape(
                     features[feature_keys.TrainEvalFeatures.TIMES])[0]))
         variables.global_variables_initializer().run()
         compare_outputs_evaled_fn = model_generator(
             random_model, model_data)
         coordinator = coordinator_lib.Coordinator()
         queue_runner_impl.start_queue_runners(session, coord=coordinator)
         compare_outputs_evaled = compare_outputs_evaled_fn(session)
         model_outputs_evaled = session.run(
             (model_outputs.end_state, model_outputs.predictions))
         coordinator.request_stop()
         coordinator.join()
         model_posteriors, model_predictions = model_outputs_evaled
         (_, compare_posteriors,
          compare_predictions) = compare_outputs_evaled
         (model_posterior_mean, model_posterior_var,
          model_from_time) = model_posteriors
         (compare_posterior_mean, compare_posterior_var,
          compare_from_time) = compare_posteriors
         self.assertAllClose(model_posterior_mean,
                             compare_posterior_mean[0])
         self.assertAllClose(model_posterior_var, compare_posterior_var[0])
         self.assertAllClose(model_from_time, compare_from_time)
         self.assertEqual(sorted(model_predictions.keys()),
                          sorted(compare_predictions.keys()))
         for prediction_name in model_predictions:
             if prediction_name == "loss":
                 # Chunking means that losses will be different; skip testing them.
                 continue
             # Compare the last chunk to their corresponding un-chunked model
             # predictions
             last_prediction_chunk = compare_predictions[prediction_name][
                 -1]
             comparison_values = last_prediction_chunk.shape[0]
             model_prediction = (
                 model_predictions[prediction_name][0, -comparison_values:])
             self.assertAllClose(model_prediction, last_prediction_chunk)
示例#11
0
 def test_numpy_discard_out_of_order_window_equal(self):
     data = _make_numpy_time_series(num_features=1, num_samples=3)
     time_series_reader = input_pipeline.NumpyReader(data)
     self._random_window_input_fn_test_template(
         time_series_reader=time_series_reader,
         num_features=1,
         window_size=3,
         batch_size=5,
         discard_out_of_order=True)
示例#12
0
 def test_long_eval(self):
     g = ops.Graph()
     with g.as_default():
         model = ar_model.ARModel(periodicities=2,
                                  num_features=1,
                                  num_time_buckets=10,
                                  input_window_size=2,
                                  output_window_size=1)
         raw_features = {
             TrainEvalFeatures.TIMES: [[1, 3, 5, 7, 11]],
             TrainEvalFeatures.VALUES: [[[1.], [2.], [3.], [4.], [5.]]]
         }
         chunked_features, _ = test_utils.AllWindowInputFn(
             time_series_reader=input_pipeline.NumpyReader(raw_features),
             window_size=3)()
         model.initialize_graph()
         with variable_scope.variable_scope("armodel") as scope:
             raw_evaluation = model.define_loss(
                 raw_features, mode=estimator_lib.ModeKeys.EVAL)
         with variable_scope.variable_scope(scope, reuse=True):
             chunked_evaluation = model.define_loss(
                 chunked_features, mode=estimator_lib.ModeKeys.EVAL)
         with session.Session() as sess:
             coordinator = coordinator_lib.Coordinator()
             queue_runner_impl.start_queue_runners(sess, coord=coordinator)
             variables.global_variables_initializer().run()
             raw_evaluation_evaled, chunked_evaluation_evaled = sess.run(
                 [raw_evaluation, chunked_evaluation])
             self.assertAllClose(chunked_evaluation_evaled.loss,
                                 raw_evaluation_evaled.loss)
             last_chunk_evaluation_state = [
                 state[-1, None]
                 for state in chunked_evaluation_evaled.end_state
             ]
             for last_chunk_state_member, raw_state_member in zip(
                     last_chunk_evaluation_state,
                     raw_evaluation_evaled.end_state):
                 self.assertAllClose(last_chunk_state_member,
                                     raw_state_member)
             self.assertAllEqual([[5, 7, 11]],
                                 raw_evaluation_evaled.prediction_times)
             for feature_name in raw_evaluation.predictions:
                 self.assertAllEqual(
                     [
                         1, 3, 1
                     ],  # batch, window, num_features. The window size has 2
                     # cut off for the first input_window.
                     raw_evaluation_evaled.predictions[feature_name].shape)
                 self.assertAllClose(
                     np.reshape(
                         chunked_evaluation_evaled.
                         predictions[feature_name], [-1]),
                     np.reshape(
                         raw_evaluation_evaled.predictions[feature_name],
                         [-1]))
             coordinator.request_stop()
             coordinator.join()
示例#13
0
 def test_loop_unrolling(self):
     """Tests running/restoring from a checkpoint with static unrolling."""
     model = TimeDependentStateSpaceModel(
         # Unroll during training, but not evaluation
         static_unrolling_window_size_threshold=2)
     estimator = estimators.StateSpaceRegressor(model=model)
     times = numpy.arange(100)
     values = numpy.arange(100)
     dataset = {
         feature_keys.TrainEvalFeatures.TIMES: times,
         feature_keys.TrainEvalFeatures.VALUES: values
     }
     train_input_fn = input_pipeline.RandomWindowInputFn(
         input_pipeline.NumpyReader(dataset), batch_size=16, window_size=2)
     eval_input_fn = input_pipeline.WholeDatasetInputFn(
         input_pipeline.NumpyReader(dataset))
     estimator.train(input_fn=train_input_fn, max_steps=1)
     estimator.evaluate(input_fn=eval_input_fn, steps=1)
示例#14
0
 def test_numpy_withbatch(self):
     data_nobatch = _make_numpy_time_series(num_features=4, num_samples=100)
     data = {
         feature_name: feature_value[None]
         for feature_name, feature_value in data_nobatch.items()
     }
     time_series_reader = input_pipeline.NumpyReader(data)
     self._whole_dataset_input_fn_test_template(
         time_series_reader=time_series_reader,
         num_features=4,
         num_samples=100)
示例#15
0
 def test_numpy_discard_out_of_order_window_too_large(self):
     data = _make_numpy_time_series(num_features=1, num_samples=2)
     time_series_reader = input_pipeline.NumpyReader(data)
     with self.assertRaisesRegexp(ValueError,
                                  "only 2 records were available"):
         self._random_window_input_fn_test_template(
             time_series_reader=time_series_reader,
             num_features=1,
             window_size=3,
             batch_size=5,
             discard_out_of_order=True)
示例#16
0
 def test_structural_ensemble_numpy_input(self):
     numpy_data = {
         "times": numpy.arange(50),
         "values": numpy.random.normal(size=[50])
     }
     estimators.StructuralEnsembleRegressor(
         num_features=1,
         periodicities=[],
         model_dir=self.get_temp_dir(),
         config=_SeedRunConfig()).train(input_pipeline.WholeDatasetInputFn(
             input_pipeline.NumpyReader(numpy_data)),
                                        steps=1)
示例#17
0
    def dry_run_train_helper(self,
                             sample_every,
                             period,
                             num_samples,
                             model_type,
                             model_args,
                             num_features=1):
        numpy.random.seed(1)
        dtype = dtypes.float32
        features = self.simple_data(sample_every,
                                    dtype=dtype,
                                    period=period,
                                    num_samples=num_samples,
                                    num_features=num_features)
        model = model_type(
            configuration=(state_space_model.StateSpaceModelConfiguration(
                num_features=num_features,
                dtype=dtype,
                covariance_prior_fn=lambda _: 0.)),
            **model_args)

        class _RunConfig(estimator_lib.RunConfig):
            @property
            def tf_random_seed(self):
                return 4

        estimator = estimators.StateSpaceRegressor(model, config=_RunConfig())
        train_input_fn = input_pipeline.RandomWindowInputFn(
            input_pipeline.NumpyReader(features),
            num_threads=1,
            shuffle_seed=1,
            batch_size=16,
            window_size=16)
        eval_input_fn = input_pipeline.WholeDatasetInputFn(
            input_pipeline.NumpyReader(features))
        estimator.train(input_fn=train_input_fn, max_steps=1)
        first_evaluation = estimator.evaluate(input_fn=eval_input_fn, steps=1)
        estimator.train(input_fn=train_input_fn, max_steps=3)
        second_evaluation = estimator.evaluate(input_fn=eval_input_fn, steps=1)
        self.assertLess(second_evaluation["loss"], first_evaluation["loss"])
示例#18
0
 def test_ar_lstm_regressor(self):
     dtype = dtypes.float32
     model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
     exogenous_feature_columns = (
         feature_column.numeric_column("exogenous"), )
     estimator = estimators.LSTMAutoRegressor(
         periodicities=10,
         input_window_size=10,
         output_window_size=6,
         model_dir=model_dir,
         num_features=1,
         extra_feature_columns=exogenous_feature_columns,
         num_units=10,
         config=_SeedRunConfig())
     times = numpy.arange(20, dtype=numpy.int64)
     values = numpy.arange(20, dtype=dtype.as_numpy_dtype)
     exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype)
     features = {
         feature_keys.TrainEvalFeatures.TIMES: times,
         feature_keys.TrainEvalFeatures.VALUES: values,
         "exogenous": exogenous
     }
     train_input_fn = input_pipeline.RandomWindowInputFn(
         input_pipeline.NumpyReader(features),
         shuffle_seed=2,
         num_threads=1,
         batch_size=16,
         window_size=16)
     eval_input_fn = input_pipeline.RandomWindowInputFn(
         input_pipeline.NumpyReader(features),
         shuffle_seed=3,
         num_threads=1,
         batch_size=16,
         window_size=16)
     estimator.train(input_fn=train_input_fn, steps=1)
     evaluation = estimator.evaluate(input_fn=eval_input_fn, steps=1)
     self.assertAllEqual(evaluation["loss"], evaluation["average_loss"])
     self.assertAllEqual([], evaluation["loss"].shape)
示例#19
0
 def test_no_periodicity(self):
     """Test that no errors are raised when periodicites is None."""
     dtype = dtypes.float64
     times = [1, 2, 3, 4, 5, 6]
     values = [[0.01], [5.10], [5.21], [0.30], [5.41], [0.50]]
     regressor = estimators.StructuralEnsembleRegressor(
         periodicities=None,
         num_features=1,
         moving_average_order=0,
         dtype=dtype)
     features = {
         TrainEvalFeatures.TIMES: times,
         TrainEvalFeatures.VALUES: values
     }
     train_input_fn = input_pipeline.RandomWindowInputFn(
         input_pipeline.NumpyReader(features), window_size=6, batch_size=1)
     regressor.train(input_fn=train_input_fn, steps=1)
     eval_input_fn = input_pipeline.WholeDatasetInputFn(
         input_pipeline.NumpyReader(features))
     evaluation = regressor.evaluate(input_fn=eval_input_fn, steps=1)
     predict_input_fn = input_pipeline.predict_continuation_input_fn(
         evaluation, times=[[7, 8, 9]])
     regressor.predict(input_fn=predict_input_fn)
示例#20
0
    def _test_pass_to_next(self, read_offset, step, correct_offset):
        stub_model = StubTimeSeriesModel(correct_offset=correct_offset)
        data = self._make_test_data(length=100 + read_offset,
                                    cut_start=None,
                                    cut_end=None,
                                    offset=100.,
                                    step=step)
        init_input_fn = input_pipeline.WholeDatasetInputFn(
            input_pipeline.NumpyReader(
                {k: v[:-read_offset]
                 for k, v in data.items()}))
        result_input_fn = input_pipeline.WholeDatasetInputFn(
            input_pipeline.NumpyReader(
                {k: v[read_offset:]
                 for k, v in data.items()}))

        chainer = state_management.ChainingStateManager(
            state_saving_interval=1)
        stub_model.initialize_graph()
        chainer.initialize_graph(model=stub_model)
        init_model_outputs = chainer.define_loss(
            model=stub_model,
            features=init_input_fn()[0],
            mode=estimator_lib.ModeKeys.TRAIN)
        result_model_outputs = chainer.define_loss(
            model=stub_model,
            features=result_input_fn()[0],
            mode=estimator_lib.ModeKeys.TRAIN)
        with self.cached_session() as session:
            variables.global_variables_initializer().run()
            coordinator = coordinator_lib.Coordinator()
            queue_runner_impl.start_queue_runners(session, coord=coordinator)
            init_model_outputs.loss.eval()
            returned_loss = result_model_outputs.loss.eval()
            coordinator.request_stop()
            coordinator.join()
            return returned_loss
示例#21
0
 def _time_dependency_test_template(self, model_type):
     """Test that a time-dependent observation model influences predictions."""
     model = model_type()
     estimator = estimators.StateSpaceRegressor(
         model=model,
         optimizer=gradient_descent.GradientDescentOptimizer(0.1))
     values = numpy.reshape([1., 2., 3., 4.], newshape=[1, 4, 1])
     input_fn = input_pipeline.WholeDatasetInputFn(
         input_pipeline.NumpyReader({
             feature_keys.TrainEvalFeatures.TIMES: [[0, 1, 2, 3]],
             feature_keys.TrainEvalFeatures.VALUES:
             values
         }))
     estimator.train(input_fn=input_fn, max_steps=1)
     predicted_values = estimator.evaluate(input_fn=input_fn,
                                           steps=1)["mean"]
     # Throw out the first value so we don't test the prior
     self.assertAllEqual(values[1:], predicted_values[1:])
示例#22
0
    def test_exact_posterior_recovery_no_transition_noise(self):
        with self.cached_session() as session:
            stub_model, data, true_params = self._get_single_model()
            input_fn = input_pipeline.WholeDatasetInputFn(
                input_pipeline.NumpyReader(data))
            features, _ = input_fn()
            model_outputs = stub_model.get_batch_loss(
                features=features,
                mode=None,
                state=math_utils.replicate_state(
                    start_state=stub_model.get_start_state(),
                    batch_size=array_ops.shape(
                        features[feature_keys.TrainEvalFeatures.TIMES])[0]))
            variables.global_variables_initializer().run()
            coordinator = coordinator_lib.Coordinator()
            queue_runner_impl.start_queue_runners(session, coord=coordinator)
            posterior_mean, posterior_var, posterior_times = session.run(
                # Feed the true model parameters so that this test doesn't depend on
                # the generated parameters being close to the variable initializations
                # (an alternative would be training steps to fit the noise values,
                # which would be slow).
                model_outputs.end_state,
                feed_dict=true_params)
            coordinator.request_stop()
            coordinator.join()

            self.assertAllClose(numpy.zeros([1, 4, 4]),
                                posterior_var,
                                atol=1e-2)
            self.assertAllClose(numpy.dot(
                numpy.linalg.matrix_power(
                    stub_model.transition,
                    data[feature_keys.TrainEvalFeatures.TIMES].shape[1]),
                true_params[stub_model.prior_state_mean]),
                                posterior_mean[0],
                                rtol=1e-1)
            self.assertAllClose(
                math_utils.batch_end_time(
                    features[feature_keys.TrainEvalFeatures.TIMES]).eval(),
                posterior_times)
示例#23
0
 def test_chained_exact_posterior_recovery_no_transition_noise(self):
     with self.cached_session() as session:
         stub_model, data, true_params = self._get_single_model()
         chunk_size = 10
         input_fn = test_utils.AllWindowInputFn(
             input_pipeline.NumpyReader(data), window_size=chunk_size)
         features, _ = input_fn()
         state_manager = state_management.ChainingStateManager(
             state_saving_interval=1)
         state_manager.initialize_graph(stub_model)
         model_outputs = state_manager.define_loss(
             model=stub_model,
             features=features,
             mode=estimator_lib.ModeKeys.TRAIN)
         variables.global_variables_initializer().run()
         coordinator = coordinator_lib.Coordinator()
         queue_runner_impl.start_queue_runners(session, coord=coordinator)
         for _ in range(
                 data[feature_keys.TrainEvalFeatures.TIMES].shape[1] //
                 chunk_size):
             model_outputs.loss.eval()
         posterior_mean, posterior_var, posterior_times = session.run(
             model_outputs.end_state, feed_dict=true_params)
         coordinator.request_stop()
         coordinator.join()
         self.assertAllClose(numpy.zeros([1, 4, 4]),
                             posterior_var,
                             atol=1e-2)
         self.assertAllClose(numpy.dot(
             numpy.linalg.matrix_power(
                 stub_model.transition,
                 data[feature_keys.TrainEvalFeatures.TIMES].shape[1]),
             true_params[stub_model.prior_state_mean]),
                             posterior_mean[0],
                             rtol=1e-1)
         self.assertAllClose(
             data[feature_keys.TrainEvalFeatures.TIMES][:, -1],
             posterior_times)
示例#24
0
 def test_state_override(self):
     test_start_state = (numpy.array([[2, 3, 4]]), (numpy.array([2]),
                                                    numpy.array([[3.,
                                                                  5.]])))
     data = {
         feature_keys.FilteringFeatures.TIMES: numpy.arange(5),
         feature_keys.FilteringFeatures.VALUES: numpy.zeros(shape=[5, 3])
     }
     features, _ = input_pipeline.WholeDatasetInputFn(
         input_pipeline.NumpyReader(data))()
     features[feature_keys.FilteringFeatures.STATE_TUPLE] = test_start_state
     stub_model = _StateOverrideModel()
     chainer = state_management.ChainingStateManager()
     stub_model.initialize_graph()
     chainer.initialize_graph(model=stub_model)
     model_outputs = chainer.define_loss(model=stub_model,
                                         features=features,
                                         mode=estimator_lib.ModeKeys.EVAL)
     with train.MonitoredSession() as session:
         end_state = session.run(model_outputs.end_state)
     nest.assert_same_structure(test_start_state, end_state)
     for expected, received in zip(nest.flatten(test_start_state),
                                   nest.flatten(end_state)):
         self.assertAllEqual(expected, received)
示例#25
0
 def _input_statistics_test_template(self,
                                     stat_object,
                                     num_features,
                                     dtype,
                                     give_full_data,
                                     warmup_iterations=0,
                                     rtol=1e-6,
                                     data_length=500,
                                     chunk_size=4):
     graph = ops.Graph()
     with graph.as_default():
         numpy_dtype = dtype.as_numpy_dtype
         values = (
             (numpy.arange(data_length, dtype=numpy_dtype)[..., None] +
              numpy.arange(num_features, dtype=numpy_dtype)[None,
                                                            ...])[None])
         times = 2 * (numpy.arange(data_length)[None]) - 3
         if give_full_data:
             stat_object.set_data((times, values))
         features = {
             TrainEvalFeatures.TIMES: times,
             TrainEvalFeatures.VALUES: values
         }
         input_fn = input_pipeline.RandomWindowInputFn(
             batch_size=16,
             window_size=chunk_size,
             time_series_reader=input_pipeline.NumpyReader(features))
         statistics = stat_object.initialize_graph(features=input_fn()[0])
         with self.session(graph=graph) as session:
             variables.global_variables_initializer().run()
             coordinator = coordinator_lib.Coordinator()
             queue_runner_impl.start_queue_runners(session,
                                                   coord=coordinator)
             for _ in range(warmup_iterations):
                 # A control dependency should ensure that, for queue-based statistics,
                 # a use of any statistic is preceded by an update of all adaptive
                 # statistics.
                 statistics.total_observation_count.eval()
             self.assertAllClose(
                 range(num_features) +
                 numpy.mean(numpy.arange(chunk_size))[None],
                 statistics.series_start_moments.mean.eval(),
                 rtol=rtol)
             self.assertAllClose(
                 numpy.tile(
                     numpy.var(numpy.arange(chunk_size))[None],
                     [num_features]),
                 statistics.series_start_moments.variance.eval(),
                 rtol=rtol)
             self.assertAllClose(
                 numpy.mean(values[0], axis=0),
                 statistics.overall_feature_moments.mean.eval(),
                 rtol=rtol)
             self.assertAllClose(
                 numpy.var(values[0], axis=0),
                 statistics.overall_feature_moments.variance.eval(),
                 rtol=rtol)
             self.assertAllClose(-3,
                                 statistics.start_time.eval(),
                                 rtol=rtol)
             self.assertAllClose(data_length,
                                 statistics.total_observation_count.eval(),
                                 rtol=rtol)
             coordinator.request_stop()
             coordinator.join()
示例#26
0
 def test_one_shot_prediction_head_export(self, estimator_factory):
   def _new_temp_dir():
     return os.path.join(test.get_temp_dir(), str(ops.uid()))
   model_dir = _new_temp_dir()
   categorical_column = feature_column.categorical_column_with_hash_bucket(
       key="categorical_exogenous_feature", hash_bucket_size=16)
   exogenous_feature_columns = [
       feature_column.numeric_column(
           "2d_exogenous_feature", shape=(2,)),
       feature_column.embedding_column(
           categorical_column=categorical_column, dimension=10)]
   estimator = estimator_factory(
       model_dir=model_dir,
       exogenous_feature_columns=exogenous_feature_columns,
       head_type=ts_head_lib.OneShotPredictionHead)
   train_features = {
       feature_keys.TrainEvalFeatures.TIMES: numpy.arange(
           20, dtype=numpy.int64),
       feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange(
           20, dtype=numpy.float32)[:, None], [1, 5]),
       "2d_exogenous_feature": numpy.ones([20, 2]),
       "categorical_exogenous_feature": numpy.array(
           ["strkey"] * 20)[:, None]
   }
   train_input_fn = input_pipeline.RandomWindowInputFn(
       input_pipeline.NumpyReader(train_features), shuffle_seed=2,
       num_threads=1, batch_size=16, window_size=16)
   estimator.train(input_fn=train_input_fn, steps=5)
   result = estimator.evaluate(input_fn=train_input_fn, steps=1)
   self.assertIn("average_loss", result)
   self.assertNotIn(feature_keys.State.STATE_TUPLE, result)
   input_receiver_fn = estimator.build_raw_serving_input_receiver_fn()
   export_location = estimator.export_saved_model(_new_temp_dir(),
                                                  input_receiver_fn)
   graph = ops.Graph()
   with graph.as_default():
     with session_lib.Session() as session:
       signatures = loader.load(
           session, [tag_constants.SERVING], export_location)
       self.assertEqual([feature_keys.SavedModelLabels.PREDICT],
                        list(signatures.signature_def.keys()))
       predict_signature = signatures.signature_def[
           feature_keys.SavedModelLabels.PREDICT]
       six.assertCountEqual(
           self,
           [feature_keys.FilteringFeatures.TIMES,
            feature_keys.FilteringFeatures.VALUES,
            "2d_exogenous_feature",
            "categorical_exogenous_feature"],
           predict_signature.inputs.keys())
       features = {
           feature_keys.TrainEvalFeatures.TIMES: numpy.tile(
               numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]),
           feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange(
               20, dtype=numpy.float32)[None, :, None], [2, 1, 5]),
           "2d_exogenous_feature": numpy.ones([2, 35, 2]),
           "categorical_exogenous_feature": numpy.tile(numpy.array(
               ["strkey"] * 35)[None, :, None], [2, 1, 1])
       }
       feeds = {
           graph.as_graph_element(input_value.name): features[input_key]
           for input_key, input_value in predict_signature.inputs.items()}
       fetches = {output_key: graph.as_graph_element(output_value.name)
                  for output_key, output_value
                  in predict_signature.outputs.items()}
       output = session.run(fetches, feed_dict=feeds)
       self.assertEqual((2, 15, 5), output["mean"].shape)
   # Build a parsing input function, then make a tf.Example for it to parse.
   export_location = estimator.export_saved_model(
       _new_temp_dir(),
       estimator.build_one_shot_parsing_serving_input_receiver_fn(
           filtering_length=20, prediction_length=15))
   graph = ops.Graph()
   with graph.as_default():
     with session_lib.Session() as session:
       example = example_pb2.Example()
       times = example.features.feature[feature_keys.TrainEvalFeatures.TIMES]
       values = example.features.feature[feature_keys.TrainEvalFeatures.VALUES]
       times.int64_list.value.extend(range(35))
       for i in range(20):
         values.float_list.value.extend(
             [float(i) * 2. + feature_number
              for feature_number in range(5)])
       real_feature = example.features.feature["2d_exogenous_feature"]
       categortical_feature = example.features.feature[
           "categorical_exogenous_feature"]
       for i in range(35):
         real_feature.float_list.value.extend([1, 1])
         categortical_feature.bytes_list.value.append(b"strkey")
       # Serialize the tf.Example for feeding to the Session
       examples = [example.SerializeToString()] * 2
       signatures = loader.load(
           session, [tag_constants.SERVING], export_location)
       predict_signature = signatures.signature_def[
           feature_keys.SavedModelLabels.PREDICT]
       ((_, input_value),) = predict_signature.inputs.items()
       feeds = {graph.as_graph_element(input_value.name): examples}
       fetches = {output_key: graph.as_graph_element(output_value.name)
                  for output_key, output_value
                  in predict_signature.outputs.items()}
       output = session.run(fetches, feed_dict=feeds)
       self.assertEqual((2, 15, 5), output["mean"].shape)
示例#27
0
    def train_helper(self,
                     input_window_size,
                     loss,
                     max_loss=None,
                     train_steps=200,
                     anomaly_prob=0.01,
                     anomaly_distribution=None,
                     multiple_periods=False):
        np.random.seed(3)
        data_noise_stddev = 0.2
        if max_loss is None:
            if loss == ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS:
                max_loss = 1.0
            else:
                max_loss = 0.05 / (data_noise_stddev**2)
        train_data, test_data = self.create_data(
            noise_stddev=data_noise_stddev,
            anomaly_prob=anomaly_prob,
            multiple_periods=multiple_periods)
        output_window_size = 10
        window_size = input_window_size + output_window_size

        class _RunConfig(estimator_lib.RunConfig):
            @property
            def tf_random_seed(self):
                return 3

        estimator = ARRegressor(
            periodicities=self.period,
            anomaly_prior_probability=0.01 if anomaly_distribution else None,
            anomaly_distribution=anomaly_distribution,
            num_features=2,
            output_window_size=output_window_size,
            num_time_buckets=20,
            input_window_size=input_window_size,
            hidden_layer_sizes=[16],
            loss=loss,
            config=_RunConfig())
        train_input_fn = input_pipeline.RandomWindowInputFn(
            time_series_reader=input_pipeline.NumpyReader(train_data),
            window_size=window_size,
            batch_size=64,
            num_threads=1,
            shuffle_seed=2)
        test_input_fn = test_utils.AllWindowInputFn(
            time_series_reader=input_pipeline.NumpyReader(test_data),
            window_size=window_size)

        # Test training
        estimator.train(input_fn=train_input_fn, steps=train_steps)
        test_evaluation = estimator.evaluate(input_fn=test_input_fn, steps=1)
        test_loss = test_evaluation["loss"]
        logging.info("Final test loss: %f", test_loss)
        self.assertLess(test_loss, max_loss)
        if loss == ar_model.ARModel.SQUARED_LOSS:
            # Test that the evaluation loss is reported without input scaling.
            self.assertAllClose(
                test_loss,
                np.mean((test_evaluation["mean"] -
                         test_evaluation["observed"])**2))

        # Test predict
        train_data_times = train_data[TrainEvalFeatures.TIMES]
        train_data_values = train_data[TrainEvalFeatures.VALUES]
        test_data_times = test_data[TrainEvalFeatures.TIMES]
        test_data_values = test_data[TrainEvalFeatures.VALUES]
        predict_times = np.expand_dims(
            np.concatenate(
                [train_data_times[input_window_size:], test_data_times]), 0)
        predict_true_values = np.expand_dims(
            np.concatenate(
                [train_data_values[input_window_size:], test_data_values]), 0)
        state_times = np.expand_dims(train_data_times[:input_window_size], 0)
        state_values = np.expand_dims(train_data_values[:input_window_size, :],
                                      0)
        state_exogenous = state_times[:, :, None][:, :, :0]

        def prediction_input_fn():
            return ({
                PredictionFeatures.TIMES:
                training.limit_epochs(predict_times, num_epochs=1),
                PredictionFeatures.STATE_TUPLE:
                (state_times, state_values, state_exogenous)
            }, {})

        (predictions, ) = tuple(
            estimator.predict(input_fn=prediction_input_fn))
        predicted_mean = predictions["mean"][:, 0]
        true_values = predict_true_values[0, :, 0]

        if loss == ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS:
            variances = predictions["covariance"][:, 0]
            standard_deviations = np.sqrt(variances)
            # Note that we may get tighter bounds with more training steps.
            errors = np.abs(predicted_mean -
                            true_values) > 4 * standard_deviations
            fraction_errors = np.mean(errors)
            logging.info("Fraction errors: %f", fraction_errors)
示例#28
0
 def test_savedmodel_state_override(self):
     random_model = RandomStateSpaceModel(
         state_dimension=5,
         state_noise_dimension=4,
         configuration=state_space_model.StateSpaceModelConfiguration(
             exogenous_feature_columns=[
                 layers.real_valued_column("exogenous")
             ],
             dtype=dtypes.float64,
             num_features=1))
     estimator = estimators.StateSpaceRegressor(
         model=random_model,
         optimizer=gradient_descent.GradientDescentOptimizer(0.1))
     combined_input_fn = input_pipeline.WholeDatasetInputFn(
         input_pipeline.NumpyReader({
             feature_keys.FilteringFeatures.TIMES: [1, 2, 3, 4],
             feature_keys.FilteringFeatures.VALUES: [1., 2., 3., 4.],
             "exogenous": [-1., -2., -3., -4.]
         }))
     estimator.train(combined_input_fn, steps=1)
     export_location = estimator.export_saved_model(
         self.get_temp_dir(),
         estimator.build_raw_serving_input_receiver_fn())
     with ops.Graph().as_default() as graph:
         random_model.initialize_graph()
         with self.session(graph=graph) as session:
             variables.global_variables_initializer().run()
             evaled_start_state = session.run(
                 random_model.get_start_state())
     evaled_start_state = [
         state_element[None, ...] for state_element in evaled_start_state
     ]
     with ops.Graph().as_default() as graph:
         with self.session(graph=graph) as session:
             signatures = loader.load(session, [tag_constants.SERVING],
                                      export_location)
             first_split_filtering = saved_model_utils.filter_continuation(
                 continue_from={
                     feature_keys.FilteringResults.STATE_TUPLE:
                     evaled_start_state
                 },
                 signatures=signatures,
                 session=session,
                 features={
                     feature_keys.FilteringFeatures.TIMES: [1, 2],
                     feature_keys.FilteringFeatures.VALUES: [1., 2.],
                     "exogenous": [[-1.], [-2.]]
                 })
             second_split_filtering = saved_model_utils.filter_continuation(
                 continue_from=first_split_filtering,
                 signatures=signatures,
                 session=session,
                 features={
                     feature_keys.FilteringFeatures.TIMES: [3, 4],
                     feature_keys.FilteringFeatures.VALUES: [3., 4.],
                     "exogenous": [[-3.], [-4.]]
                 })
             combined_filtering = saved_model_utils.filter_continuation(
                 continue_from={
                     feature_keys.FilteringResults.STATE_TUPLE:
                     evaled_start_state
                 },
                 signatures=signatures,
                 session=session,
                 features={
                     feature_keys.FilteringFeatures.TIMES: [1, 2, 3, 4],
                     feature_keys.FilteringFeatures.VALUES:
                     [1., 2., 3., 4.],
                     "exogenous": [[-1.], [-2.], [-3.], [-4.]]
                 })
             split_predict = saved_model_utils.predict_continuation(
                 continue_from=second_split_filtering,
                 signatures=signatures,
                 session=session,
                 steps=1,
                 exogenous_features={"exogenous": [[[-5.]]]})
             combined_predict = saved_model_utils.predict_continuation(
                 continue_from=combined_filtering,
                 signatures=signatures,
                 session=session,
                 steps=1,
                 exogenous_features={"exogenous": [[[-5.]]]})
     for state_key, combined_state_value in combined_filtering.items():
         if state_key == feature_keys.FilteringResults.TIMES:
             continue
         self.assertAllClose(combined_state_value,
                             second_split_filtering[state_key])
     for prediction_key, combined_value in combined_predict.items():
         self.assertAllClose(combined_value, split_predict[prediction_key])
示例#29
0
 def test_numpy_multivariate(self):
     data = _make_numpy_time_series(num_features=3, num_samples=50)
     time_series_reader = input_pipeline.NumpyReader(data)
     self._test_multivariate(time_series_reader, num_features=3)
示例#30
0
 def test_numpy_discard_out_of_order(self):
     data = _make_numpy_time_series(num_features=1, num_samples=50)
     time_series_reader = input_pipeline.NumpyReader(data)
     self._test_out_of_order(time_series_reader, discard_out_of_order=True)