def deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks): linear_optimizer = get_optimizer_instance(linear_optimizer, None) dnn_optimizer = get_optimizer_instance(dnn_optimizer, None) if KUIBA: push_click_auc = kraken_push_auc( "click", 0, tf.strings.to_number(features['userId'], out_type=tf.int64), tf.strings.to_number(features['movieId'], out_type=tf.int64), tf.nn.sigmoid(tf.squeeze(logits, 1)), tf.cast(labels, tf.int64), tf.cast(features['timestamp'], tf.int64) * 1000000, ) else: push_click_auc = tf.no_op("dummy") with tf.control_dependencies([push_click_auc]): train_op_fn = get_train_op_fn(linear_optimizer, dnn_optimizer) head = Head(task) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=train_op_fn, logits=logits, training_chief_hooks=training_chief_hooks)
def deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks): linear_optimizer = get_optimizer_instance(linear_optimizer, 0.005) dnn_optimizer = get_optimizer_instance(dnn_optimizer, 0.01) train_op_fn = get_train_op_fn(linear_optimizer, dnn_optimizer) head = Head(task) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=train_op_fn, logits=logits, training_chief_hooks=training_chief_hooks)
def __init__(self, n_classes, optimizer=None, learning_rate=None, one_batchnorm_per_resblock=False, dropout_rate=0, model_dir=None, config=None, warm_start_from=None, multi_gpu=False): params = { 'n_classes': n_classes, # If an instance of an optimizer is passed in, this will just # return it. 'optimizer': (None if optimizer is None else get_optimizer_instance( optimizer, learning_rate)), 'one_batchnorm_per_resblock': one_batchnorm_per_resblock, 'dropout_rate': dropout_rate, } # if multi_gpu: # params['optimizer'] = TowerOptimizer(params['optimizer']) # _model_fn = replicate_model_fn(_model_fn) super(HighRes3DNet, self).__init__(model_fn=model_fn, model_dir=model_dir, params=params, config=config, warm_start_from=warm_start_from)
def __init__(self, params, model_dir=None, optimizer='Adagrad', config=None, warm_start_from=None, ): if not optimizer: optimizer = 'Adagrad' self.optimizer = optimizers.get_optimizer_instance(optimizer, params["learning_rate"]) self.logit_fn_dict = {"base": _base_logit_fn, "din": din_logit_fn, "dcn": dcn_logit_fn, "dupn": dupn_logit_fn} def _model_fn(features, labels, mode, params): logit_fn = self.logit_fn_dict[params["sub_model"]] with tf.variable_scope('ctr_model'): ctr_logits = logit_fn(features, mode, params) with tf.variable_scope('cvr_model'): cvr_logits = logit_fn(features, mode, params) ctr = tf.sigmoid(ctr_logits, name="CTR") cvr = tf.sigmoid(cvr_logits, name="CVR") ctcvr = tf.multiply(ctr, cvr, name="CTCVR") if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'ctcvr': ctcvr, 'ctr': ctr, 'cvr': cvr } export_outputs = { 'prediction': tf.estimator.export.PredictOutput(predictions) } return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) y = labels['cvr'] cvr_loss = tf.reduce_sum(tf.keras.backend.binary_crossentropy(y, ctcvr), name="cvr_loss") ctr_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels['ctr'], logits=ctr_logits), name="ctr_loss") loss = tf.add(ctr_loss, cvr_loss, name="ctcvr_loss") ctr_accuracy = tf.metrics.accuracy(labels=labels['ctr'], predictions=tf.to_float(tf.greater_equal(ctr, 0.5))) cvr_accuracy = tf.metrics.accuracy(labels=y, predictions=tf.to_float(tf.greater_equal(ctcvr, 0.5))) ctr_auc = tf.metrics.auc(labels['ctr'], ctr) cvr_auc = tf.metrics.auc(y, ctcvr) metrics = {'cvr_accuracy': cvr_accuracy, 'ctr_accuracy': ctr_accuracy, 'ctr_auc': ctr_auc, 'cvr_auc': cvr_auc} tf.summary.scalar('ctr_accuracy', ctr_accuracy[1]) tf.summary.scalar('cvr_accuracy', cvr_accuracy[1]) tf.summary.scalar('ctr_auc', ctr_auc[1]) tf.summary.scalar('cvr_auc', cvr_auc[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) # Create training op. assert mode == tf.estimator.ModeKeys.TRAIN update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = self.optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) super(ESMM, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, params=params, warm_start_from=warm_start_from)
def __init__(self, n_classes, optimizer=None, learning_rate=None, model_dir=None, config=None, warm_start_from=None, multi_gpu=False): params = { 'n_classes': n_classes, # If an instance of an optimizer is passed in, this will just # return it. 'optimizer': ( None if optimizer is None else get_optimizer_instance(optimizer, learning_rate)), } _model_fn = model_fn if multi_gpu: params['optimizer'] = TowerOptimizer(params['optimizer']) _model_fn = replicate_model_fn(_model_fn) super(HighRes3DNet, self).__init__( model_fn=_model_fn, model_dir=model_dir, params=params, config=config, warm_start_from=warm_start_from, )
def __init__(self, n_classes, optimizer=None, n_filters=96, keep_prob=0.5, learning_rate=None, model_dir=None, config=None, warm_start_from=None, multi_gpu=False, n_examples=1.0, prior_path=None): params = { 'n_classes': n_classes, # If an instance of an optimizer is passed in, this will just # return it. 'optimizer': ( None if optimizer is None else get_optimizer_instance(optimizer, learning_rate)), 'n_filters': n_filters, 'n_examples': n_examples, 'prior_path': prior_path } _model_fn = model_fn if multi_gpu: params['optimizer'] = TowerOptimizer(params['optimizer']) _model_fn = replicate_model_fn(_model_fn) super(MeshNetBWN, self).__init__( model_fn=_model_fn, model_dir=model_dir, params=params, config=config, warm_start_from=warm_start_from)
def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer, partitioner, config, sparse_combiner='sum'): """A model_fn for linear models that use a gradient-based optimizer. Args: features: dict of `Tensor`. labels: `Tensor` of shape `[batch_size, logits_dimension]`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. feature_columns: An iterable containing all the feature columns used by the model. optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. partitioner: Partitioner for variables. config: `RunConfig` object to configure the runtime settings. sparse_combiner: A string specifying how to reduce if a categorical column is multivalent. One of "mean", "sqrtn", and "sum". Returns: An `EstimatorSpec` instance. Raises: ValueError: mode or params are invalid, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) optimizer = optimizers.get_optimizer_instance( optimizer or _get_default_optimizer(feature_columns), learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope('linear', values=tuple(six.itervalues(features)), partitioner=partitioner): logit_fn = _linear_logit_fn_builder(units=head.logits_dimension, feature_columns=feature_columns, sparse_combiner=sparse_combiner) logits = logit_fn(features=features) return head.create_estimator_spec(features=features, mode=mode, labels=labels, optimizer=optimizer, logits=logits)
def test_object(self): class _TestOptimizer(optimizer_lib.Optimizer): def __init__(self): super(_TestOptimizer, self).__init__(use_locking=False, name='TestOptimizer') opt = optimizers.get_optimizer_instance(_TestOptimizer()) self.assertIsInstance(opt, _TestOptimizer)
def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer, partitioner, config): """A model_fn for linear models that use a gradient-based optimizer. Args: features: dict of `Tensor`. labels: `Tensor` of shape `[batch_size, logits_dimension]`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. feature_columns: An iterable containing all the feature columns used by the model. optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. partitioner: Partitioner for variables. config: `RunConfig` object to configure the runtime settings. Returns: An `EstimatorSpec` instance. Raises: ValueError: mode or params are invalid, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) optimizer = optimizers.get_optimizer_instance( optimizer or _get_default_optimizer(feature_columns), learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'linear', values=tuple(six.itervalues(features)), partitioner=partitioner): logit_fn = _linear_logit_fn_builder( units=head.logits_dimension, feature_columns=feature_columns) logits = logit_fn(features=features) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def test_object(self): class _TestOptimizer(optimizer_lib.Optimizer): def __init__(self): super(_TestOptimizer, self).__init__( use_locking=False, name='TestOptimizer') opt = optimizers.get_optimizer_instance(_TestOptimizer()) self.assertIsInstance(opt, _TestOptimizer)
def _linear_model_fn(features, labels, mode, params, config): """A model_fn for linear models that use a gradient-based optimizer. Args: features: Dict of `Tensor`. labels: `Tensor` of shape `[batch_size, logits_dimension]`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. config: `RunConfig` object to configure the runtime settings. Returns: An `EstimatorSpec` instance. Raises: ValueError: If mode or params are invalid. """ head = params['head'] feature_columns = tuple(params['feature_columns']) optimizer = optimizers.get_optimizer_instance( params.get('optimizer') or _get_default_optimizer(feature_columns), learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = params.get('partitioner') or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'linear', values=tuple(six.itervalues(features)), partitioner=partitioner): logits = feature_column_lib.linear_model( features=features, feature_columns=feature_columns, units=head.logits_dimension) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer, partitioner, config): """A model_fn for linear models that use a gradient-based optimizer. Args: features: dict of `Tensor`. labels: `Tensor` of shape `[batch_size, logits_dimension]`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. feature_columns: An iterable containing all the feature columns used by the model. optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. partitioner: Partitioner for variables. config: `RunConfig` object to configure the runtime settings. Returns: An `EstimatorSpec` instance. Raises: ValueError: mode or params are invalid, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) optimizer = optimizers.get_optimizer_instance( optimizer or _get_default_optimizer(feature_columns), learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope('linear', values=tuple(six.itervalues(features)), partitioner=partitioner): logits = feature_column_lib.linear_model( features=features, feature_columns=feature_columns, units=head.logits_dimension) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _linear_model_fn(features, labels, mode, params, config): """A model_fn for linear models that use a gradient-based optimizer. Args: features: Dict of `Tensor`. labels: `Tensor` of shape `[batch_size, logits_dimension]`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use a FTRL optimizer. config: `RunConfig` object to configure the runtime settings. Returns: An `EstimatorSpec` instance. Raises: ValueError: If mode or params are invalid. """ head = params['head'] feature_columns = tuple(params['feature_columns']) optimizer = optimizers.get_optimizer_instance( params.get('optimizer') or _get_default_optimizer(feature_columns), learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = params.get('partitioner') or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope('linear', values=tuple(six.itervalues(features)), partitioner=partitioner): logits = feature_column_lib.linear_model( features=features, feature_columns=feature_columns, units=head.logits_dimension) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dfm_model_fn(features, labels, mode, head, hidden_units, linear_feature_columns, dnn_feature_columns, fm_feature_columns, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None): if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) optimizer = optimizers.get_optimizer_instance(optimizer, learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 # 在tensorflow的ps架构中,ps负责存储模型的参数,worker负责使用训练数据对参数进行更新。默认情况下,tensorflow会把参数按照 # round-robin的方式放到各个参数服务器(ps)上。 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope('dcn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) logit_fn = _dnn_logit_fn_builder( units=head.logits_dimension, hidden_units=hidden_units, linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns, fm_feature_columns=fm_feature_columns, activation_fn=activation_fn, dropout=dropout, input_layer_partitioner=input_layer_partitioner) logits = logit_fn(features=features, mode=mode) return head.create_estimator_spec(features=features, mode=mode, labels=labels, optimizer=optimizer, logits=logits)
def __init__( self, params, model_dir=None, optimizer='Adagrad', config=None, warm_start_from=None, ): ''' an implement of Hierarchical Attention Networks for Document Classification ''' if not optimizer: optimizer = 'Adagrad' self.optimizer = optimizers.get_optimizer_instance( optimizer, params.learning_rate) def _model_fn(features, labels, mode, params): # 构建模型 word_embedded = self.word2vec(features["content"]) sent_vec = self.sent2vec(word_embedded, features["sentence_len"], mode) doc_vec = self.doc2vec(sent_vec, features["sentence_num"], mode) is_training = mode == tf.estimator.ModeKeys.TRAIN if "doc_embedding_keep_rate" in params and params.doc_embedding_keep_rate < 1.0: doc_vec = tf.layers.dropout(doc_vec, params.doc_embedding_keep_rate, training=is_training) if params.num_classes == 2: my_head = tf.contrib.estimator.binary_classification_head() else: my_head = tf.contrib.estimator.multi_class_head( params.num_classes) logits = tf.layers.dense(doc_vec, my_head.logits_dimension, activation=None) return my_head.create_estimator_spec( features=features, mode=mode, labels=labels, logits=logits, train_op_fn=lambda loss: self.optimizer.minimize( loss, global_step=tf.train.get_global_step())) super(HAN, self).__init__(model_fn=_model_fn, model_dir=model_dir, config=config, params=params, warm_start_from=warm_start_from)
def dupn_model_fn(features, labels, mode, params): behvr_emb, property_emb, item_emb = get_behavior_embedding( params, features) print("behvr_emb shape:", behvr_emb.shape) print("property_emb shape:", property_emb.shape) print("item_emb shape:", item_emb.shape) inputs = tf.concat([behvr_emb, property_emb], -1) print("lstm inputs shape:", inputs.shape) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=params["num_units"]) #initial_state = lstm_cell.zero_state(params["batch_size"], tf.float32) outputs, state = tf.nn.dynamic_rnn(lstm_cell, inputs, dtype=tf.float32) print("lstm output shape:", outputs.shape) masks = tf.cast(features["behaviorPids"] >= 0, tf.float32) user = fc.input_layer(features, params["user_feature_columns"]) context = tf.concat([user, item_emb], -1) print("attention context shape:", context.shape) sequence = attention(outputs, context, params, masks) print("sequence embedding shape:", sequence.shape) other = fc.input_layer(features, params["other_feature_columns"]) net = tf.concat([sequence, item_emb, other], -1) # Build the hidden layers, sized according to the 'hidden_units' param. for units in params['hidden_units']: net = tf.layers.dense(net, units=int(units), activation=tf.nn.relu) if 'dropout_rate' in params and params['dropout_rate'] > 0.0: net = tf.layers.dropout( net, params['dropout_rate'], training=(mode == tf.estimator.ModeKeys.TRAIN)) # Compute logits logits = tf.layers.dense(net, 1, activation=None) optimizer = optimizers.get_optimizer_instance(params["optimizer"], params["learning_rate"]) my_head = tf.contrib.estimator.binary_classification_head(thresholds=[0.5]) return my_head.create_estimator_spec( features=features, mode=mode, labels=labels, logits=logits, train_op_fn=lambda loss: optimizer.minimize( loss, global_step=tf.train.get_global_step()))
def train_and_eval(): """Train and Evaluate.""" hparams = { "train_input_pattern": FLAGS.train_input_pattern, "eval_input_pattern": FLAGS.eval_input_pattern, "learning_rate": FLAGS.learning_rate, "train_batch_size": FLAGS.batch_size, "eval_batch_size": FLAGS.batch_size, "predict_batch_size": FLAGS.batch_size, "num_train_steps": FLAGS.num_train_steps, "num_eval_steps": FLAGS.num_eval_steps, "checkpoint_secs": FLAGS.checkpoint_secs, "num_checkpoints": FLAGS.num_checkpoints, "loss": FLAGS.loss, "list_size": FLAGS.list_size, "listwise_inference": True, "convert_labels_to_binary": False, "model_dir": FLAGS.model_dir } optimizer = optimizers.get_optimizer_instance( "Adam", learning_rate=FLAGS.learning_rate) estimator = tfr.estimator.EstimatorBuilder( context_feature_columns=context_feature_columns(), example_feature_columns=example_feature_columns(), scoring_function=scoring_function, transform_function=transform_function, optimizer=optimizer, loss_reduction=tf.compat.v1.losses.Reduction.MEAN, hparams=hparams).make_estimator() ranking_pipeline = DASALCPipeline( context_feature_columns=context_feature_columns(), example_feature_columns=example_feature_columns(), hparams=hparams, estimator=estimator, label_feature_name="utility", label_feature_type=tf.int64, best_exporter_metric="metric/ndcg_5") ranking_pipeline.train_and_eval()
def _dnn_model_fn(features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None): optimizer = optimizers.get_optimizer_instance(optimizer, learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope('dnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) logit_fn = _dnn_logit_fn_builder( units=head.logits_dimension, hidden_units=hidden_units, feature_columns=feature_columns, activation_fn=activation_fn, dropout=dropout, input_layer_partitioner=input_layer_partitioner) logits = logit_fn(features=features, mode=mode) return head.create_estimator_spec(features=features, mode=mode, labels=labels, optimizer=optimizer, logits=logits)
def __init__(self, n_classes, optimizer, n_filters=64, n_examples=1.0, n_prior_samples=1.0, learning_rate=None, model_dir=None, config=None, warm_start_from=None, prior_path=None, multi_gpu=False, only_kld=False, is_mc='True'): print('Learning Rate: ' + str(learning_rate)) params = { 'n_classes': n_classes, # If an instance of an optimizer is passed in, this will just # return it. 'optimizer': get_optimizer_instance(optimizer, learning_rate), 'n_filters': n_filters, 'n_examples': n_examples, 'prior_path': prior_path, 'n_prior_samples': n_prior_samples, 'only_kld': only_kld, 'is_mc': is_mc } _model_fn = model_fn if multi_gpu: params['optimizer'] = TowerOptimizer(params['optimizer']) _model_fn = replicate_model_fn(_model_fn) super(MeshNetCWN, self).__init__(model_fn=_model_fn, model_dir=model_dir, params=params, config=config, warm_start_from=warm_start_from)
def _dnn_linear_combined_model_fn(features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.' ) num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = (partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope(dnn_parent_scope, values=tuple( six.itervalues(features)), partitioner=dnn_partitioner): with variable_scope.variable_scope( 'input', partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=dnn_feature_columns) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as dnn_hidden_layer_scope: net = core_layers.dense(net, units=num_hidden_units, activation=dnn_activation_fn, kernel_initializer=init_ops. glorot_uniform_initializer(), name=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dnn_dropout, training=True) _add_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net, )) as dnn_logits_scope: dnn_logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=dnn_logits_scope) _add_layer_summary(dnn_logits, dnn_logits_scope.name) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: linear_logits = feature_column_lib.linear_model( features=features, feature_columns=linear_feature_columns, units=head.logits_dimension) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize(loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def optimizer_fn(): return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05)
def test_ftrl(self): opt = optimizers.get_optimizer_instance('Ftrl', learning_rate=0.1) self.assertIsInstance(opt, ftrl.FtrlOptimizer) self.assertAlmostEqual(0.1, opt._learning_rate)
def test_callable_returns_invalid(self): def _optimizer_fn(): return (1, 2, 3) with self.assertRaisesRegexp( ValueError, 'The given object is not an Optimizer instance'): optimizers.get_optimizer_instance(_optimizer_fn)
def _dnn_model_fn( features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net model_fn. Args: features: Dict of `Tensor` (depends on data passed to `train`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ optimizer = optimizers.get_optimizer_instance( optimizer, learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope( 'dnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope( 'logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_linear_combined_model_fn(features, labels, mode, head, linear_feature_columns=None, linear_optimizer='Ftrl', dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None, dnn_activation_fn=nn.relu, dnn_dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `Head` instance. linear_feature_columns: An iterable containing all the feature columns used by the Linear model. linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. dnn_hidden_units: List of hidden units per DNN layer. dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. input_layer_partitioner: Partitioner for input layer. config: `RunConfig` object to configure the runtime settings. Returns: An `EstimatorSpec` instance. Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time, or `input_layer_partitioner` is missing, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( 'Either linear_feature_columns or dnn_feature_columns must be defined.') num_ps_replicas = config.num_ps_replicas if config else 0 input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Build DNN Logits. dnn_parent_scope = 'dnn' if not dnn_feature_columns: dnn_logits = None else: dnn_optimizer = optimizers.get_optimizer_instance( dnn_optimizer, learning_rate=_DNN_LEARNING_RATE) _check_no_sync_replicas_optimizer(dnn_optimizer) if not dnn_hidden_units: raise ValueError( 'dnn_hidden_units must be defined when dnn_feature_columns is ' 'specified.') dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): dnn_logit_fn = dnn._dnn_logit_fn_builder( # pylint: disable=protected-access units=head.logits_dimension, hidden_units=dnn_hidden_units, feature_columns=dnn_feature_columns, activation_fn=dnn_activation_fn, dropout=dnn_dropout, input_layer_partitioner=input_layer_partitioner) dnn_logits = dnn_logit_fn(features=features, mode=mode) linear_parent_scope = 'linear' if not linear_feature_columns: linear_logits = None else: linear_optimizer = optimizers.get_optimizer_instance( linear_optimizer, learning_rate=_linear_learning_rate(len(linear_feature_columns))) _check_no_sync_replicas_optimizer(linear_optimizer) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as scope: logit_fn = linear._linear_logit_fn_builder( # pylint: disable=protected-access units=head.logits_dimension, feature_columns=linear_feature_columns) linear_logits = logit_fn(features=features) _add_layer_summary(linear_logits, scope.name) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _train_op_fn(loss): """Returns the op to optimize the loss.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( dnn_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))) if linear_logits is not None: train_ops.append( linear_optimizer.minimize( loss, var_list=ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES, scope=linear_parent_scope))) train_op = control_flow_ops.group(*train_ops) with ops.control_dependencies([train_op]): return distribute_lib.increment_var(global_step) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def test_lambda(self): opt = optimizers.get_optimizer_instance(lambda: _TestOptimizer()) # pylint: disable=unnecessary-lambda self.assertIsInstance(opt, _TestOptimizer)
def _rnn_model_fn(features, labels, mode, head, rnn_cell_fn, sequence_feature_columns, context_feature_columns, optimizer='Adagrad', input_layer_partitioner=None, config=None): """Recurrent Neural Net model_fn. Args: features: dict of `Tensor` and `SparseTensor` objects returned from `input_fn`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] with labels. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. rnn_cell_fn: A function with one argument, a `tf.estimator.ModeKeys`, and returns an object of type `tf.nn.rnn_cell.RNNCell`. sequence_feature_columns: Iterable containing `FeatureColumn`s that represent sequential model inputs. context_feature_columns: Iterable containing `FeatureColumn`s that represent model inputs not associated with a specific timestep. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05 and gradient clip norm of 5.0. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: An `EstimatorSpec` instance. Raises: ValueError: If mode or optimizer is invalid, or features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) # If user does not provide an optimizer instance, use the optimizer specified # by the string with default learning rate and gradient clipping. if not isinstance(optimizer, optimizer_lib.Optimizer): optimizer = optimizers.get_optimizer_instance( optimizer, learning_rate=_DEFAULT_LEARNING_RATE) optimizer = extenders.clip_gradients_by_norm(optimizer, _DEFAULT_CLIP_NORM) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope( 'rnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) logit_fn = _rnn_logit_fn_builder( output_units=head.logits_dimension, rnn_cell_fn=rnn_cell_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, input_layer_partitioner=input_layer_partitioner) logits = logit_fn(features=features, mode=mode) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def _dnn_model_fn(features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None, tpu_estimator_spec=False): """Deep Neural Net model_fn. Args: features: dict of `Tensor`. labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. tpu_estimator_spec: Whether to return a `_TPUEstimatorSpec` or or `model_fn.EstimatorSpec` instance. Returns: An `EstimatorSpec` instance. Raises: ValueError: If features has the wrong type. """ if not isinstance(features, dict): raise ValueError('features should be a dictionary of `Tensor`s. ' 'Given type: {}'.format(type(features))) optimizer = optimizers.get_optimizer_instance( optimizer, learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope( 'dnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) logit_fn = _dnn_logit_fn_builder( units=head.logits_dimension, hidden_units=hidden_units, feature_columns=feature_columns, activation_fn=activation_fn, dropout=dropout, input_layer_partitioner=input_layer_partitioner) logits = logit_fn(features=features, mode=mode) if tpu_estimator_spec: return head._create_tpu_estimator_spec( # pylint: disable=protected-access features=features, mode=mode, labels=labels, optimizer=optimizer, logits=logits) else: return head.create_estimator_spec( features=features, mode=mode, labels=labels, optimizer=optimizer, logits=logits)
def test_rmsprop(self): opt = optimizers.get_optimizer_instance('RMSProp', learning_rate=0.1) self.assertIsInstance(opt, rmsprop.RMSPropOptimizer) self.assertAlmostEqual(0.1, opt._learning_rate)
def test_sgd(self): opt = optimizers.get_optimizer_instance('SGD', learning_rate=0.1) self.assertIsInstance(opt, gradient_descent.GradientDescentOptimizer) self.assertAlmostEqual(0.1, opt._learning_rate)
def test_supported_name_but_learning_rate_none(self): with self.assertRaisesRegexp( ValueError, 'learning_rate must be specified when opt is string'): optimizers.get_optimizer_instance('Adagrad', learning_rate=None)
def __init__(self, periodicities, input_window_size, output_window_size, model_dir=None, num_features=1, extra_feature_columns=None, num_timesteps=10, loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS, num_units=128, optimizer="Adam", config=None): """Initialize the Estimator. Args: periodicities: periodicities of the input data, in the same units as the time feature (for example 24 if feeding hourly data with a daily periodicity, or 60 * 24 if feeding minute-level data with daily periodicity). Note this can be a single value or a list of values for multiple periodicities. input_window_size: Number of past time steps of data to look at when doing the regression. output_window_size: Number of future time steps to predict. Note that setting this value to > 1 empirically seems to give a better fit. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. num_features: The dimensionality of the time series (default value is one for univariate, more than one for multivariate). extra_feature_columns: A list of `tf.feature_column`s (for example `tf.feature_column.embedding_column`) corresponding to features which provide extra information to the model but are not part of the series to be predicted. num_timesteps: Number of buckets into which to divide (time % periodicity). This value multiplied by the number of periodicities is the number of time features added to the model. loss: Loss function to use for training. Currently supported values are SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For SQUARED_LOSS, the evaluation loss is reported based on un-scaled observations and predictions, while the training loss is computed on normalized data. num_units: The size of the hidden state in the encoder and decoder LSTM cells. optimizer: string, `tf.compat.v1.train.Optimizer` object, or callable that defines the optimizer algorithm to use for training. Defaults to the Adam optimizer with a learning rate of 0.01. config: Optional `estimator.RunConfig` object to configure the runtime settings. """ optimizer = optimizers.get_optimizer_instance(optimizer, learning_rate=0.01) model = ar_model.ARModel( periodicities=periodicities, input_window_size=input_window_size, output_window_size=output_window_size, num_features=num_features, exogenous_feature_columns=extra_feature_columns, num_time_buckets=num_timesteps, loss=loss, prediction_model_factory=functools.partial( ar_model.LSTMPredictionModel, num_units=num_units)) state_manager = state_management.FilteringOnlyStateManager() super(LSTMAutoRegressor, self).__init__( model=model, state_manager=state_manager, optimizer=optimizer, model_dir=model_dir, config=config, head_type=ts_head_lib.OneShotPredictionHead)
def test_adam(self): opt = optimizers.get_optimizer_instance('Adam', learning_rate=0.1) self.assertIsInstance(opt, adam.AdamOptimizer) self.assertAlmostEqual(0.1, opt._lr)
def test_object_invalid(self): with self.assertRaisesRegexp( ValueError, 'The given object is not an Optimizer instance'): optimizers.get_optimizer_instance((1, 2, 3))
def _dnn_model_fn(features, labels, mode, head, hidden_units, feature_columns, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None): """Deep Neural Net model_fn. Args: features: Dict of `Tensor` (depends on data passed to `train`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. head: A `head_lib._Head` instance. hidden_units: Iterable of integer number of hidden units per layer. feature_columns: Iterable of `feature_column._FeatureColumn` model inputs. optimizer: String, `tf.Optimizer` object, or callable that creates the optimizer to use for training. If not specified, will use the Adagrad optimizer with a default learning rate of 0.05. activation_fn: Activation function applied to each layer. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ optimizer = optimizers.get_optimizer_instance(optimizer, learning_rate=_LEARNING_RATE) num_ps_replicas = config.num_ps_replicas if config else 0 partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope('dnn', values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = core_layers.dropout(net, rate=dropout, training=True) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=head.logits_dimension, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizer.minimize( loss, global_step=training_util.get_global_step()) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)
def train_op_fn(loss): opt = optimizers.get_optimizer_instance( optimizer, learning_rate=_LEARNING_RATE) return opt.minimize(loss, global_step=training_util.get_global_step())
def test_object(self): opt = optimizers.get_optimizer_instance(_TestOptimizer()) self.assertIsInstance(opt, _TestOptimizer)
def test_callable(self): def _optimizer_fn(): return _TestOptimizer() opt = optimizers.get_optimizer_instance(_optimizer_fn) self.assertIsInstance(opt, _TestOptimizer)
def __init__(self, periodicities, input_window_size, output_window_size, model_dir=None, num_features=1, extra_feature_columns=None, num_timesteps=10, loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS, num_units=128, optimizer="Adam", config=None): """Initialize the Estimator. Args: periodicities: periodicities of the input data, in the same units as the time feature (for example 24 if feeding hourly data with a daily periodicity, or 60 * 24 if feeding minute-level data with daily periodicity). Note this can be a single value or a list of values for multiple periodicities. input_window_size: Number of past time steps of data to look at when doing the regression. output_window_size: Number of future time steps to predict. Note that setting this value to > 1 empirically seems to give a better fit. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. num_features: The dimensionality of the time series (default value is one for univariate, more than one for multivariate). extra_feature_columns: A list of `tf.feature_column`s (for example `tf.feature_column.embedding_column`) corresponding to features which provide extra information to the model but are not part of the series to be predicted. num_timesteps: Number of buckets into which to divide (time % periodicity). This value multiplied by the number of periodicities is the number of time features added to the model. loss: Loss function to use for training. Currently supported values are SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For SQUARED_LOSS, the evaluation loss is reported based on un-scaled observations and predictions, while the training loss is computed on normalized data. num_units: The size of the hidden state in the encoder and decoder LSTM cells. optimizer: string, `tf.compat.v1.train.Optimizer` object, or callable that defines the optimizer algorithm to use for training. Defaults to the Adam optimizer with a learning rate of 0.01. config: Optional `estimator.RunConfig` object to configure the runtime settings. """ optimizer = optimizers.get_optimizer_instance(optimizer, learning_rate=0.01) model = ar_model.ARModel( periodicities=periodicities, input_window_size=input_window_size, output_window_size=output_window_size, num_features=num_features, exogenous_feature_columns=extra_feature_columns, num_time_buckets=num_timesteps, loss=loss, prediction_model_factory=functools.partial( ar_model.LSTMPredictionModel, num_units=num_units)) state_manager = state_management.FilteringOnlyStateManager() super(LSTMAutoRegressor, self).__init__(model=model, state_manager=state_manager, optimizer=optimizer, model_dir=model_dir, config=config, head_type=ts_head_lib.OneShotPredictionHead)
def test_unsupported_name(self): with self.assertRaisesRegexp( ValueError, 'Unsupported optimizer name: unsupported_name'): optimizers.get_optimizer_instance('unsupported_name', learning_rate=0.1)