def build_estimator(tf_transform_dir, config, hidden_units=None): """Build an estimator for predicting the tipping behavior of taxi riders. Args: tf_transform_dir: directory in which the tf-transform model was written during the preprocessing step. config: tf.contrib.learn.RunConfig defining the runtime environment for the estimator (including model_dir). hidden_units: [int], the layer sizes of the DNN (input layer first) Returns: Resulting DNNLinearCombinedClassifier. """ metadata_dir = os.path.join(tf_transform_dir, transform_fn_io.TRANSFORMED_METADATA_DIR) transformed_metadata = metadata_io.read_metadata(metadata_dir) transformed_feature_spec = transformed_metadata.schema.as_feature_spec() transformed_feature_spec.pop(taxi.transformed_name(taxi.LABEL_KEY)) real_valued_columns = [ tf.feature_column.numeric_column(key, shape=()) for key in taxi.transformed_names(taxi.DENSE_FLOAT_FEATURE_KEYS) ] categorical_columns = [ tf.feature_column.categorical_column_with_identity( key, num_buckets=taxi.VOCAB_SIZE + taxi.OOV_SIZE, default_value=0) for key in taxi.transformed_names(taxi.VOCAB_FEATURE_KEYS) ] categorical_columns += [ tf.feature_column.categorical_column_with_identity( key, num_buckets=taxi.FEATURE_BUCKET_COUNT, default_value=0) for key in taxi.transformed_names(taxi.BUCKET_FEATURE_KEYS) ] categorical_columns += [ tf.feature_column.categorical_column_with_identity( key, num_buckets=num_buckets, default_value=0) for key, num_buckets in zip( taxi.transformed_names(taxi.CATEGORICAL_FEATURE_KEYS), # taxi.MAX_CATEGORICAL_FEATURE_VALUES) ] return tf.estimator.DNNLinearCombinedClassifier( config=config, linear_feature_columns=categorical_columns, dnn_feature_columns=real_valued_columns, dnn_hidden_units=hidden_units or [100, 70, 50, 25])
def build_estimator(tf_transform_output, config, hidden_units=None): """Build an estimator for predicting the tipping behavior of taxi riders. Args: tf_transform_output: A TFTransformOutput. config: tf.contrib.learn.RunConfig defining the runtime environment for the estimator (including model_dir). hidden_units: [int], the layer sizes of the DNN (input layer first) Returns: Resulting DNNLinearCombinedClassifier. """ transformed_feature_spec = ( tf_transform_output.transformed_feature_spec().copy()) transformed_feature_spec.pop(taxi.transformed_name(taxi.LABEL_KEY)) real_valued_columns = [ tf.feature_column.numeric_column(key, shape=()) for key in taxi.transformed_names(taxi.DENSE_FLOAT_FEATURE_KEYS) ] categorical_columns = [ tf.feature_column.categorical_column_with_identity( key, num_buckets=taxi.VOCAB_SIZE + taxi.OOV_SIZE, default_value=0) for key in taxi.transformed_names(taxi.VOCAB_FEATURE_KEYS) ] categorical_columns += [ tf.feature_column.categorical_column_with_identity( key, num_buckets=taxi.FEATURE_BUCKET_COUNT, default_value=0) for key in taxi.transformed_names(taxi.BUCKET_FEATURE_KEYS) ] categorical_columns += [ tf.feature_column.categorical_column_with_identity( key, num_buckets=num_buckets, default_value=0) for key, num_buckets in zip( taxi.transformed_names(taxi.CATEGORICAL_FEATURE_KEYS), # taxi.MAX_CATEGORICAL_FEATURE_VALUES) ] return tf.estimator.DNNLinearCombinedClassifier( config=config, linear_feature_columns=categorical_columns, dnn_feature_columns=real_valued_columns, dnn_hidden_units=hidden_units or [100, 70, 50, 25])