def _test_missing_values(self, cut_start, cut_end, offset): stub_model = StubTimeSeriesModel() data = self._make_test_data(length=100, cut_start=cut_start, cut_end=cut_end, offset=offset) input_fn = test_utils.AllWindowInputFn( input_pipeline.NumpyReader(data), window_size=10) chainer = state_management.ChainingStateManager( state_saving_interval=1) features, _ = input_fn() stub_model.initialize_graph() chainer.initialize_graph(model=stub_model) model_outputs = chainer.define_loss(model=stub_model, features=features, mode=estimator_lib.ModeKeys.TRAIN) with self.cached_session() as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) for _ in range(10): model_outputs.loss.eval() returned_loss = model_outputs.loss.eval() coordinator.request_stop() coordinator.join() return returned_loss
def _test_pass_to_next(self, read_offset, step, correct_offset): stub_model = StubTimeSeriesModel(correct_offset=correct_offset) data = self._make_test_data( length=100 + read_offset, cut_start=None, cut_end=None, offset=100., step=step) init_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader( {k: v[:-read_offset] for k, v in data.items()})) result_input_fn = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader( {k: v[read_offset:] for k, v in data.items()})) chainer = state_management.ChainingStateManager( state_saving_interval=1) stub_model.initialize_graph() chainer.initialize_graph(model=stub_model) init_model_outputs = chainer.define_loss( model=stub_model, features=init_input_fn()[0], mode=estimator_lib.ModeKeys.TRAIN) result_model_outputs = chainer.define_loss( model=stub_model, features=result_input_fn()[0], mode=estimator_lib.ModeKeys.TRAIN) with self.test_session() as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) init_model_outputs.loss.eval() returned_loss = result_model_outputs.loss.eval() coordinator.request_stop() coordinator.join() return returned_loss
def _test_initialization(self, warmup_iterations, batch_size): stub_model = StubTimeSeriesModel() data = self._make_test_data(length=20, cut_start=None, cut_end=None, offset=0.) if batch_size == -1: input_fn = test_utils.AllWindowInputFn( input_pipeline.NumpyReader(data), window_size=10) else: input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(data), window_size=10, batch_size=batch_size) chainer = state_management.ChainingStateManager( state_saving_interval=1) features, _ = input_fn() stub_model.initialize_graph() chainer.initialize_graph(model=stub_model) model_outputs = chainer.define_loss(model=stub_model, features=features, mode=estimator_lib.ModeKeys.TRAIN) with self.cached_session() as session: variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) for _ in range(warmup_iterations): # Warm up saved state model_outputs.loss.eval() outputs = model_outputs.loss.eval() coordinator.request_stop() coordinator.join() return outputs
def chained_model_outputs(original_model, data): input_fn = test_utils.AllWindowInputFn( input_pipeline.NumpyReader(data), window_size=chunk_size) state_manager = state_management.ChainingStateManager( state_saving_interval=1) features, _ = input_fn() state_manager.initialize_graph(original_model) model_outputs = state_manager.define_loss( model=original_model, features=features, mode=estimator_lib.ModeKeys.TRAIN) def _eval_outputs(session): for _ in range(50): # Warm up saved state model_outputs.loss.eval() (posterior_mean, posterior_var, priors_from_time) = model_outputs.end_state posteriors = ((posterior_mean, ), (posterior_var, ), priors_from_time) outputs = (model_outputs.loss, posteriors, model_outputs.predictions) chunked_outputs_evaled = session.run(outputs) return chunked_outputs_evaled return _eval_outputs
def _custom_time_series_regressor( model_dir, head_type, exogenous_feature_columns): return ts_estimators.TimeSeriesRegressor( model=lstm_example._LSTMModel( num_features=5, num_units=128, exogenous_feature_columns=exogenous_feature_columns), optimizer=adam.AdamOptimizer(0.001), config=estimator_lib.RunConfig(tf_random_seed=4), state_manager=state_management.ChainingStateManager(), head_type=head_type, model_dir=model_dir)
def __init__(self, model, state_manager=None, optimizer=None, model_dir=None, config=None): """See TimeSeriesRegressor. Uses the ChainingStateManager by default.""" if not isinstance(model, state_space_model.StateSpaceModel): raise ValueError( "StateSpaceRegressor only supports state space models (children of " "StateSpaceModel) in its `model` argument, got {}.".format(model)) if state_manager is None: state_manager = state_management.ChainingStateManager() super(StateSpaceRegressor, self).__init__( model=model, state_manager=state_manager, optimizer=optimizer, model_dir=model_dir, config=config)
def test_initialize_graph_state_manager_error(self): with self.assertRaisesRegexp(ValueError, "initialize_graph"): model = RandomStateSpaceModel(2, 2) state_manager = state_management.ChainingStateManager() outputs = state_manager.define_loss( model=model, features={ feature_keys.TrainEvalFeatures.TIMES: constant_op.constant([[1, 2]]), feature_keys.TrainEvalFeatures.VALUES: constant_op.constant([[[1.], [2.]]]) }, mode=estimator_lib.ModeKeys.TRAIN) initializer = variables.global_variables_initializer() with self.test_session() as sess: sess.run([initializer]) outputs.loss.eval()
def test_state_override(self): test_start_state = (numpy.array([[2, 3, 4]]), (numpy.array([2]), numpy.array([[3., 5.]]))) data = { feature_keys.FilteringFeatures.TIMES: numpy.arange(5), feature_keys.FilteringFeatures.VALUES: numpy.zeros(shape=[5, 3]) } features, _ = input_pipeline.WholeDatasetInputFn( input_pipeline.NumpyReader(data))() features[feature_keys.FilteringFeatures.STATE_TUPLE] = test_start_state stub_model = _StateOverrideModel() chainer = state_management.ChainingStateManager() stub_model.initialize_graph() chainer.initialize_graph(model=stub_model) model_outputs = chainer.define_loss( model=stub_model, features=features, mode=estimator_lib.ModeKeys.EVAL) with train.MonitoredSession() as session: end_state = session.run(model_outputs.end_state) nest.assert_same_structure(test_start_state, end_state) for expected, received in zip( nest.flatten(test_start_state), nest.flatten(end_state)): self.assertAllEqual(expected, received)
def test_chained_exact_posterior_recovery_no_transition_noise(self): with self.test_session() as session: stub_model, data, true_params = self._get_single_model() chunk_size = 10 input_fn = test_utils.AllWindowInputFn( input_pipeline.NumpyReader(data), window_size=chunk_size) features, _ = input_fn() state_manager = state_management.ChainingStateManager( state_saving_interval=1) state_manager.initialize_graph(stub_model) model_outputs = state_manager.define_loss( model=stub_model, features=features, mode=estimator_lib.ModeKeys.TRAIN) variables.global_variables_initializer().run() coordinator = coordinator_lib.Coordinator() queue_runner_impl.start_queue_runners(session, coord=coordinator) for _ in range( data[feature_keys.TrainEvalFeatures.TIMES].shape[1] // chunk_size): model_outputs.loss.eval() posterior_mean, posterior_var, posterior_times = session.run( model_outputs.end_state, feed_dict=true_params) coordinator.request_stop() coordinator.join() self.assertAllClose(numpy.zeros([1, 4, 4]), posterior_var, atol=1e-2) self.assertAllClose(numpy.dot( numpy.linalg.matrix_power( stub_model.transition, data[feature_keys.TrainEvalFeatures.TIMES].shape[1]), true_params[stub_model.prior_state_mean]), posterior_mean[0], rtol=1e-1) self.assertAllClose( data[feature_keys.TrainEvalFeatures.TIMES][:, -1], posterior_times)
def test_one_shot_prediction_head_export(self): model_dir = self.get_temp_dir() categorical_column = feature_column.categorical_column_with_hash_bucket( key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ feature_column.numeric_column("2d_exogenous_feature", shape=(2, )), feature_column.embedding_column( categorical_column=categorical_column, dimension=10) ] estimator = ts_estimators.TimeSeriesRegressor( model=lstm_example._LSTMModel( num_features=5, num_units=128, exogenous_feature_columns=exogenous_feature_columns), optimizer=adam.AdamOptimizer(0.001), config=estimator_lib.RunConfig(tf_random_seed=4), state_manager=state_management.ChainingStateManager(), head_type=ts_head_lib.OneShotPredictionHead, model_dir=model_dir) train_features = { feature_keys.TrainEvalFeatures.TIMES: numpy.arange(20, dtype=numpy.int64), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange(20, dtype=numpy.float32)[:, None], [1, 5]), "2d_exogenous_feature": numpy.ones([20, 2]), "categorical_exogenous_feature": numpy.array(["strkey"] * 20)[:, None] } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(train_features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=5) input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_savedmodel(self.get_temp_dir(), input_receiver_fn) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: signatures = loader.load(session, [tag_constants.SERVING], export_location) self.assertEqual([feature_keys.SavedModelLabels.PREDICT], list(signatures.signature_def.keys())) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] six.assertCountEqual(self, [ feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "2d_exogenous_feature", "categorical_exogenous_feature" ], predict_signature.inputs.keys()) features = { feature_keys.TrainEvalFeatures.TIMES: numpy.tile( numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]), feature_keys.TrainEvalFeatures.VALUES: numpy.tile( numpy.arange(20, dtype=numpy.float32)[None, :, None], [2, 1, 5]), "2d_exogenous_feature": numpy.ones([2, 35, 2]), "categorical_exogenous_feature": numpy.tile( numpy.array(["strkey"] * 35)[None, :, None], [2, 1, 1]) } feeds = { graph.as_graph_element(input_value.name): features[input_key] for input_key, input_value in predict_signature.inputs.items() } fetches = { output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items() } output = session.run(fetches, feed_dict=feeds) self.assertAllEqual((2, 15, 5), output["mean"].shape)
def train_and_predict(csv_file_name=_DATA_FILE, training_steps=200, estimator_config=None, export_directory=None): """Train and predict using a custom time series model.""" # Construct an Estimator from our LSTM model. exogenous_feature_columns = [ # Exogenous features are not part of the loss, but can inform # predictions. In this example the features have no extra information, but # are included as an API example. tf.contrib.layers.real_valued_column("2d_exogenous_feature", dimension=2) ] estimator = ts_estimators.TimeSeriesRegressor( model=_LSTMModel(num_features=5, num_units=128, exogenous_feature_columns=exogenous_feature_columns), optimizer=tf.train.AdamOptimizer(0.001), config=estimator_config, # Set state to be saved across windows. state_manager=state_management.ChainingStateManager()) reader = tf.contrib.timeseries.CSVReader( csv_file_name, column_names=((tf.contrib.timeseries.TrainEvalFeatures.TIMES, ) + (tf.contrib.timeseries.TrainEvalFeatures.VALUES, ) * 5 + ("2d_exogenous_feature", ) * 2)) train_input_fn = tf.contrib.timeseries.RandomWindowInputFn(reader, batch_size=4, window_size=32) estimator.train(input_fn=train_input_fn, steps=training_steps) evaluation_input_fn = tf.contrib.timeseries.WholeDatasetInputFn(reader) evaluation = estimator.evaluate(input_fn=evaluation_input_fn, steps=1) # Predict starting after the evaluation predict_exogenous_features = { "2d_exogenous_feature": numpy.concatenate([numpy.ones([1, 100, 1]), numpy.zeros([1, 100, 1])], axis=-1) } (predictions, ) = tuple( estimator.predict( input_fn=tf.contrib.timeseries.predict_continuation_input_fn( evaluation, steps=100, exogenous_features=predict_exogenous_features))) times = evaluation["times"][0] observed = evaluation["observed"][0, :, :] predicted_mean = numpy.squeeze( numpy.concatenate([evaluation["mean"][0], predictions["mean"]], axis=0)) all_times = numpy.concatenate([times, predictions["times"]], axis=0) # Export the model in SavedModel format. if export_directory is None: export_directory = tempfile.mkdtemp() input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_savedmodel(export_directory, input_receiver_fn) # Predict using the SavedModel with tf.Graph().as_default(): with tf.Session() as session: signatures = tf.saved_model.loader.load( session, [tf.saved_model.tag_constants.SERVING], export_location) saved_model_output = ( tf.contrib.timeseries.saved_model_utils.predict_continuation( continue_from=evaluation, signatures=signatures, session=session, steps=100, exogenous_features=predict_exogenous_features)) # The exported model gives the same results as the Estimator.predict() # call above. numpy.testing.assert_allclose( predictions["mean"], numpy.squeeze(saved_model_output["mean"], axis=0)) return times, observed, all_times, predicted_mean
def train_and_predict(csv_file_name=_DATA_FILE, training_steps=200, estimator_config=None, export_directory=None): """Train and predict using a custom time series model.""" # Construct an Estimator from our LSTM model. categorical_column = tf.feature_column.categorical_column_with_hash_bucket( key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ # Exogenous features are not part of the loss, but can inform # predictions. In this example the features have no extra information, but # are included as an API example. tf.feature_column.numeric_column("2d_exogenous_feature", shape=(2, )), tf.feature_column.embedding_column( categorical_column=categorical_column, dimension=10) ] estimator = ts_estimators.TimeSeriesRegressor( model=_LSTMModel(num_features=5, num_units=128, exogenous_feature_columns=exogenous_feature_columns), optimizer=tf.train.AdamOptimizer(0.001), config=estimator_config, # Set state to be saved across windows. state_manager=state_management.ChainingStateManager()) reader = tf.contrib.timeseries.CSVReader( csv_file_name, column_names=((tf.contrib.timeseries.TrainEvalFeatures.TIMES, ) + (tf.contrib.timeseries.TrainEvalFeatures.VALUES, ) * 5 + ("2d_exogenous_feature", ) * 2 + ("categorical_exogenous_feature", )), # Data types other than for `times` need to be specified if they aren't # float32. In this case one of our exogenous features has string dtype. column_dtypes=((tf.int64, ) + (tf.float32, ) * 7 + (tf.string, ))) train_input_fn = tf.contrib.timeseries.RandomWindowInputFn(reader, batch_size=4, window_size=32) estimator.train(input_fn=train_input_fn, steps=training_steps) evaluation_input_fn = tf.contrib.timeseries.WholeDatasetInputFn(reader) evaluation = estimator.evaluate(input_fn=evaluation_input_fn, steps=1) # Predict starting after the evaluation predict_exogenous_features = { "2d_exogenous_feature": numpy.concatenate([numpy.ones([1, 100, 1]), numpy.zeros([1, 100, 1])], axis=-1), "categorical_exogenous_feature": numpy.array(["strkey"] * 100)[None, :, None] } (predictions, ) = tuple( estimator.predict( input_fn=tf.contrib.timeseries.predict_continuation_input_fn( evaluation, steps=100, exogenous_features=predict_exogenous_features))) times = evaluation["times"][0] observed = evaluation["observed"][0, :, :] predicted_mean = numpy.squeeze( numpy.concatenate([evaluation["mean"][0], predictions["mean"]], axis=0)) all_times = numpy.concatenate([times, predictions["times"]], axis=0) # Export the model in SavedModel format. We include a bit of extra boilerplate # for "cold starting" as if we didn't have any state from the Estimator, which # is the case when serving from a SavedModel. If Estimator output is # available, the result of "Estimator.evaluate" can be passed directly to # `tf.contrib.timeseries.saved_model_utils.predict_continuation` as the # `continue_from` argument. with tf.Graph().as_default(): filter_feature_tensors, _ = evaluation_input_fn() with tf.train.MonitoredSession() as session: # Fetch the series to "warm up" our state, which will allow us to make # predictions for its future values. This is just a dictionary of times, # values, and exogenous features mapping to numpy arrays. The use of an # input_fn is just a convenience for the example; they can also be # specified manually. filter_features = session.run(filter_feature_tensors) if export_directory is None: export_directory = tempfile.mkdtemp() input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_savedmodel(export_directory, input_receiver_fn) # Warm up and predict using the SavedModel with tf.Graph().as_default(): with tf.Session() as session: signatures = tf.saved_model.loader.load( session, [tf.saved_model.tag_constants.SERVING], export_location) state = tf.contrib.timeseries.saved_model_utils.cold_start_filter( signatures=signatures, session=session, features=filter_features) saved_model_output = ( tf.contrib.timeseries.saved_model_utils.predict_continuation( continue_from=state, signatures=signatures, session=session, steps=100, exogenous_features=predict_exogenous_features)) # The exported model gives the same results as the Estimator.predict() # call above. numpy.testing.assert_allclose( predictions["mean"], numpy.squeeze(saved_model_output["mean"], axis=0)) return times, observed, all_times, predicted_mean