示例#1
0
 def __init__(self, *, inputs, outputs, labels, loss, optimizer, clip_norm,
              clip_value, metrics, updates, sess, model_dir):
     self.inputs = inputs
     self.outputs = outputs
     self.labels = labels
     self.loss = loss
     self.use_bigdl_optim = False
     self.clip_norm = clip_norm
     self.clip_value = clip_value
     if optimizer is not None:
         from bigdl.orca.learn.optimizers import Optimizer
         if isinstance(optimizer, Optimizer):
             self.train_op = None
             self.optimizer = optimizer.get_optimizer()
             self.use_bigdl_optim = True
         else:
             assert isinstance(optimizer, tf.train.Optimizer), \
                 "optimizer is of type {}, ".format(type(optimizer)) + \
                 "it should be an instance of tf.train.Optimizer"
             self.optimizer = ZooOptimizer(optimizer)
             if clip_norm or clip_value:
                 gvs = self.optimizer.compute_gradients(self.loss)
                 if clip_norm:
                     gvs = [(tf.clip_by_norm(g_v[0], clip_norm), g_v[1])
                            for g_v in gvs]
                 if clip_value:
                     if isinstance(clip_value, tuple):
                         assert len(clip_value) == 2 and clip_value[0] < clip_value[1], \
                             "clip value should be (clip_min, clip_max)"
                         gvs = [(tf.clip_by_value(g_v[0], clip_value[0],
                                                  clip_value[1]), g_v[1])
                                for g_v in gvs]
                     if isinstance(clip_value, (int, float)):
                         assert clip_value > 0, "clip value should be larger than 0"
                         gvs = [(tf.clip_by_value(g_v[0], -clip_value,
                                                  clip_value), g_v[1])
                                for g_v in gvs]
                     else:
                         raise Exception(
                             "clip_value should be a tuple or one number")
                 self.train_op = self.optimizer.apply_gradients(gvs)
             else:
                 self.train_op = self.optimizer.minimize(self.loss)
     else:
         self.optimizer = None
         self.train_op = None
     self.metrics = metrics
     self.updates = updates
     if sess is None:
         self.sess = tf.Session()
         self.sess.run(tf.global_variables_initializer())
     else:
         self.sess = sess
     self.model_dir = model_dir
     self.load_checkpoint = False
     self.tf_optimizer = None
     self.log_dir = None
     self.app_name = None
示例#2
0
    def model_fn(features, labels, mode):
        from nets import lenet
        slim = tf.contrib.slim
        with slim.arg_scope(lenet.lenet_arg_scope()):
            logits, end_points = lenet.lenet(features, num_classes=10, is_training=True)

        if mode == tf.estimator.ModeKeys.EVAL or mode == tf.estimator.ModeKeys.TRAIN:
            loss = tf.reduce_mean(
                tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels))

            optimizer = ZooOptimizer(tf.train.AdamOptimizer())
            train_op = optimizer.minimize(loss)
            return tf.estimator.EstimatorSpec(mode, predictions=logits,
                                              loss=loss, train_op=train_op)
        else:
            return tf.estimator.EstimatorSpec(mode, predictions=logits)
示例#3
0
        def model_fn(features, labels, mode):

            assert features.shape.ndims == 1
            if labels is not None:
                assert labels.shape.ndims == 0

            features = tf.expand_dims(features, axis=0)

            h1 = tf.layers.dense(features, 64, activation=tf.nn.relu)
            h2 = tf.layers.dense(h1, 64, activation=tf.nn.relu)
            logits = tf.layers.dense(h2, 10)

            if mode == tf.estimator.ModeKeys.EVAL or mode == tf.estimator.ModeKeys.TRAIN:
                labels = tf.expand_dims(labels, axis=0)
                loss = tf.reduce_mean(
                    tf.losses.sparse_softmax_cross_entropy(logits=logits,
                                                           labels=labels))
                train_op = ZooOptimizer(
                    tf.train.AdamOptimizer()).minimize(loss)
                return tf.estimator.EstimatorSpec(mode,
                                                  train_op=train_op,
                                                  predictions=logits,
                                                  loss=loss)
            else:
                return tf.estimator.EstimatorSpec(mode, predictions=logits)
示例#4
0
    def _bert_classifier_model_fn(features, labels, mode, params):
        """
        Model function for BERTClassifier.

        :param features: Dict of feature tensors. Must include the key "input_ids".
        :param labels: Label tensor for training.
        :param mode: 'train', 'eval' or 'infer'.
        :param params: Must include the key "num_classes".
        :return: tf.estimator.EstimatorSpec.
        """
        import tensorflow as tf
        from bigdl.orca.tfpark import ZooOptimizer
        output_layer = bert_model(features, labels, mode,
                                  params).get_pooled_output()
        hidden_size = output_layer.shape[-1].value
        output_weights = tf.get_variable(
            "output_weights", [params["num_classes"], hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))
        output_bias = tf.get_variable("output_bias", [params["num_classes"]],
                                      initializer=tf.zeros_initializer())
        with tf.variable_scope("loss"):
            if mode == tf.estimator.ModeKeys.TRAIN:
                output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

            logits = tf.matmul(output_layer, output_weights, transpose_b=True)
            logits = tf.nn.bias_add(logits, output_bias)
            probabilities = tf.nn.softmax(logits, axis=-1)

            if mode == tf.estimator.ModeKeys.PREDICT:
                return tf.estimator.EstimatorSpec(mode=mode,
                                                  predictions=probabilities)
            else:
                log_probs = tf.nn.log_softmax(logits, axis=-1)
                one_hot_labels = tf.one_hot(labels,
                                            depth=params["num_classes"],
                                            dtype=tf.float32)
                per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                                  axis=-1)
                loss = tf.reduce_mean(per_example_loss)
                if mode == tf.estimator.ModeKeys.EVAL:
                    return tf.estimator.EstimatorSpec(
                        mode=mode, predictions=probabilities, loss=loss)
                else:
                    train_op = ZooOptimizer(optimizer).minimize(loss)
                    return tf.estimator.EstimatorSpec(mode=mode,
                                                      train_op=train_op,
                                                      loss=loss)
示例#5
0
        def model_fn(features, labels, mode):
            features = tf.layers.flatten(features)
            h1 = tf.layers.dense(features, 64, activation=tf.nn.relu)
            h2 = tf.layers.dense(h1, 64, activation=tf.nn.relu)
            logits = tf.layers.dense(h2, 10)

            if mode == tf.estimator.ModeKeys.EVAL or mode == tf.estimator.ModeKeys.TRAIN:
                loss = tf.reduce_mean(
                    tf.losses.sparse_softmax_cross_entropy(logits=logits,
                                                           labels=labels))
                train_op = ZooOptimizer(
                    tf.train.AdamOptimizer()).minimize(loss)
                return tf.estimator.EstimatorSpec(mode,
                                                  train_op=train_op,
                                                  predictions=logits,
                                                  loss=loss)
            else:
                return tf.estimator.EstimatorSpec(mode, predictions=logits)
    def model_fn(features, labels, mode, params):
        from nets import inception
        slim = tf.contrib.slim
        labels = tf.squeeze(labels, axis=1)
        with slim.arg_scope(inception.inception_v1_arg_scope()):
            logits, end_points = inception.inception_v1(
                features,
                num_classes=int(params["num_classes"]),
                is_training=True)

        if mode == tf.estimator.ModeKeys.TRAIN:
            loss = tf.reduce_mean(
                tf.losses.sparse_softmax_cross_entropy(logits=logits,
                                                       labels=labels))
            train_op = ZooOptimizer(tf.train.AdamOptimizer()).minimize(loss)
            return tf.estimator.EstimatorSpec(mode,
                                              train_op=train_op,
                                              predictions=logits,
                                              loss=loss)
        else:
            raise NotImplementedError
示例#7
0
class TensorFlowEstimator(Estimator):
    def __init__(self, *, inputs, outputs, labels, loss, optimizer, clip_norm,
                 clip_value, metrics, updates, sess, model_dir):
        self.inputs = inputs
        self.outputs = outputs
        self.labels = labels
        self.loss = loss
        self.use_bigdl_optim = False
        self.clip_norm = clip_norm
        self.clip_value = clip_value
        if optimizer is not None:
            from bigdl.orca.learn.optimizers import Optimizer
            if isinstance(optimizer, Optimizer):
                self.train_op = None
                self.optimizer = optimizer.get_optimizer()
                self.use_bigdl_optim = True
            else:
                assert isinstance(optimizer, tf.train.Optimizer), \
                    "optimizer is of type {}, ".format(type(optimizer)) + \
                    "it should be an instance of tf.train.Optimizer"
                self.optimizer = ZooOptimizer(optimizer)
                if clip_norm or clip_value:
                    gvs = self.optimizer.compute_gradients(self.loss)
                    if clip_norm:
                        gvs = [(tf.clip_by_norm(g_v[0], clip_norm), g_v[1])
                               for g_v in gvs]
                    if clip_value:
                        if isinstance(clip_value, tuple):
                            assert len(clip_value) == 2 and clip_value[0] < clip_value[1], \
                                "clip value should be (clip_min, clip_max)"
                            gvs = [(tf.clip_by_value(g_v[0], clip_value[0],
                                                     clip_value[1]), g_v[1])
                                   for g_v in gvs]
                        if isinstance(clip_value, (int, float)):
                            assert clip_value > 0, "clip value should be larger than 0"
                            gvs = [(tf.clip_by_value(g_v[0], -clip_value,
                                                     clip_value), g_v[1])
                                   for g_v in gvs]
                        else:
                            raise Exception(
                                "clip_value should be a tuple or one number")
                    self.train_op = self.optimizer.apply_gradients(gvs)
                else:
                    self.train_op = self.optimizer.minimize(self.loss)
        else:
            self.optimizer = None
            self.train_op = None
        self.metrics = metrics
        self.updates = updates
        if sess is None:
            self.sess = tf.Session()
            self.sess.run(tf.global_variables_initializer())
        else:
            self.sess = sess
        self.model_dir = model_dir
        self.load_checkpoint = False
        self.tf_optimizer = None
        self.log_dir = None
        self.app_name = None

    def fit(self,
            data,
            epochs=1,
            batch_size=32,
            feature_cols=None,
            label_cols=None,
            validation_data=None,
            session_config=None,
            checkpoint_trigger=None,
            auto_shard_files=False,
            feed_dict=None):
        """
        Train this graph model with train data.

        :param data: train data. It can be XShards, Spark DataFrame, tf.data.Dataset.
               If data is XShards, each partition can be a Pandas DataFrame or a dictionary of
               {'x': feature, 'y': label}, where feature(label) is a numpy array or a tuple of
               numpy arrays.
               If data is tf.data.Dataset, each element is a tuple of input tensors.
        :param epochs: number of epochs to train.
        :param batch_size: total batch size for each iteration.
        :param feature_cols: feature column names if train data is Spark DataFrame or XShards
               of Pandas DataFrame.
        :param label_cols: label column names if train data is Spark DataFrame or XShards of
               Pandas DataFrame.
        :param validation_data: validation data. Validation data type should be the same
               as train data.
        :param auto_shard_files: whether to automatically detect if the dataset is file-based and
               and apply sharding on files, otherwise sharding on records. Default is False.
        :param session_config: tensorflow session configuration for training.
               Should be object of tf.ConfigProto
        :param feed_dict: a dictionary. The key is TensorFlow tensor, usually a
               placeholder, the value of the dictionary is a tuple of two elements. The first one of
               the tuple is the value to feed to the tensor in training phase and the second one
               is the value to feed to the tensor in validation phase.
        :param checkpoint_trigger: when to trigger checkpoint during training.
               Should be a bigdl.orca.learn.trigger, like EveryEpoch(), SeveralIteration(
               num_iterations),etc.
        """

        assert self.labels is not None, \
            "labels is None; it should not be None in training"
        assert self.loss is not None, \
            "loss is None; it should not be None in training"
        assert self.optimizer is not None, \
            "optimizer is None; it should not be None in training"

        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in training"
            assert label_cols is not None, \
                "label columns is None; it should not be None in training"

        if isinstance(data, SparkXShards):
            if data._get_class_name() == 'pandas.core.frame.DataFrame':
                assert feature_cols is not None, \
                    "feature columns is None; it should not be None in training"
                assert label_cols is not None, \
                    "label columns is None; it should not be None in training"
                data, validation_data = process_xshards_of_pandas_dataframe(
                    data, feature_cols, label_cols, validation_data, "fit")

        if checkpoint_trigger is not None:
            checkpoint_trigger = Trigger.convert_trigger(checkpoint_trigger)

        memory_type = OrcaContext.train_data_store
        dataset = to_dataset(data,
                             batch_size=batch_size,
                             batch_per_thread=-1,
                             validation_data=validation_data,
                             feature_cols=feature_cols,
                             label_cols=label_cols,
                             hard_code_batch_size=False,
                             sequential_order=False,
                             shuffle=True,
                             auto_shard_files=auto_shard_files,
                             memory_type=memory_type)

        if feed_dict is not None:
            tensor_with_value = {
                key: (value[0], value[1])
                for key, value in feed_dict.items()
            }
        else:
            tensor_with_value = None

        if self.use_bigdl_optim:
            self.tf_optimizer = TFOptimizer.from_loss(
                self.loss,
                self.optimizer,
                session=self.sess,
                inputs=(self.inputs, self.labels),
                dataset=dataset,
                clip_norm=self.clip_norm,
                clip_value=self.clip_value,
                metrics=self.metrics,
                tensor_with_value=tensor_with_value,
                session_config=session_config,
                model_dir=self.model_dir,
                updates=self.updates)
        else:

            self.tf_optimizer = TFOptimizer.from_train_op(
                train_op=self.train_op,
                loss=self.loss,
                inputs=self.inputs,
                labels=self.labels,
                dataset=dataset,
                metrics=self.metrics,
                updates=self.updates,
                sess=self.sess,
                tensor_with_value=tensor_with_value,
                session_config=session_config,
                model_dir=self.model_dir)

        if self.load_checkpoint:
            self.tf_optimizer.load_checkpoint(self.checkpoint_path,
                                              self.checkpoint_version)

        if self.log_dir and self.app_name:
            self.tf_optimizer.estimator.set_tensorboard(
                self.log_dir, self.app_name)

        self.tf_optimizer.optimize(end_trigger=MaxEpoch(epochs),
                                   checkpoint_trigger=checkpoint_trigger)
        return self

    def predict(
        self,
        data,
        batch_size=4,
        feature_cols=None,
        auto_shard_files=False,
    ):
        """
        Predict input data

        :param data: data to be predicted. It can be XShards, Spark DataFrame.
               If data is XShards, each partition can be a Pandas DataFrame or a dictionary of
               {'x': feature}, where feature is a numpy array or a tuple of numpy arrays.
        :param batch_size: batch size per thread
        :param feature_cols: list of feature column names if input data is Spark DataFrame
               or XShards of Pandas DataFrame.
        :param auto_shard_files: whether to automatically detect if the dataset is file-based and
               and apply sharding on files, otherwise sharding on records. Default is False.
        :return: predicted result.
                 If input data is XShards or tf.data.Dataset, the predict result is a XShards, each
                 partition of the XShards is a dictionary of {'prediction': result}, where the
                 result is a numpy array or a list of numpy arrays.
                 If input data is Spark DataFrame, the predict result is a DataFrame which includes
                 original columns plus 'prediction' column. The 'prediction' column can be
                 FloatType, VectorUDT or Array of VectorUDT depending on model outputs shape.
        """

        assert self.outputs is not None, \
            "output is None, it should not be None in prediction"
        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in prediction"
        if isinstance(data, SparkXShards):
            if data._get_class_name() == 'pandas.core.frame.DataFrame':
                assert feature_cols is not None, \
                    "feature columns is None; it should not be None in prediction"
                data = process_xshards_of_pandas_dataframe(data, feature_cols)

        assert not is_tf_data_dataset(data), "tf.data.Dataset currently cannot be used for" \
                                             "estimator prediction"

        dataset = to_dataset(
            data,
            batch_size=-1,
            batch_per_thread=batch_size,
            validation_data=None,
            feature_cols=feature_cols,
            label_cols=None,
            hard_code_batch_size=False,
            sequential_order=True,
            shuffle=False,
            auto_shard_files=auto_shard_files,
        )

        flat_inputs = nest.flatten(self.inputs)
        flat_outputs = nest.flatten(self.outputs)
        tfnet = TFNet.from_session(sess=self.sess,
                                   inputs=flat_inputs,
                                   outputs=flat_outputs)
        predicted_rdd = tfnet.predict(dataset)
        if isinstance(data, DataFrame):
            return convert_predict_rdd_to_dataframe(data, predicted_rdd)
        elif isinstance(data, SparkXShards):
            return convert_predict_rdd_to_xshard(data, predicted_rdd)
        else:
            return predicted_rdd

    def evaluate(
        self,
        data,
        batch_size=32,
        feature_cols=None,
        label_cols=None,
        auto_shard_files=False,
    ):
        """
        Evaluate model.

        :param data: evaluation data. It can be XShards, Spark DataFrame, tf.data.Dataset.
               If data is XShards, each partition can be a Pandas DataFrame or a dictionary of
               {'x': feature, 'y': label}, where feature(label) is a numpy array or a tuple of
               numpy arrays.
               If data is tf.data.Dataset, each element is a tuple of input tensors.
        :param batch_size: batch size per thread.
        :param feature_cols: feature_cols: feature column names if train data is Spark DataFrame
               or XShards of Pandas DataFrame.
        :param label_cols: label column names if train data is Spark DataFrame or XShards
               of Pandas DataFrame.
        :param auto_shard_files: whether to automatically detect if the dataset is file-based and
               and apply sharding on files, otherwise sharding on records. Default is False.
        :return: evaluation result as a dictionary of {'metric name': metric value}
        """

        assert self.metrics is not None, \
            "metrics is None, it should not be None in evaluate"

        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in evaluation"
            assert label_cols is not None, \
                "label columns is None; it should not be None in evaluation"

        if isinstance(data, SparkXShards):
            if data._get_class_name() == 'pandas.core.frame.DataFrame':
                assert feature_cols is not None, \
                    "feature columns is None; it should not be None in evaluation"
                assert label_cols is not None, \
                    "label columns is None; it should not be None in evaluation"
                data = process_xshards_of_pandas_dataframe(
                    data, feature_cols, label_cols)

        dataset = to_dataset(
            data,
            batch_size=-1,
            batch_per_thread=batch_size,
            validation_data=None,
            feature_cols=feature_cols,
            label_cols=label_cols,
            hard_code_batch_size=False,
            sequential_order=True,
            shuffle=False,
            auto_shard_files=auto_shard_files,
        )

        flat_inputs = nest.flatten(self.inputs)
        flat_labels = nest.flatten(self.labels)

        return evaluate_metrics(flat_inputs + flat_labels,
                                sess=self.sess,
                                dataset=dataset,
                                metrics=self.metrics)

    def save_tf_checkpoint(self, path):
        """
        Save tensorflow checkpoint in this estimator.

        :param path: tensorflow checkpoint path.
        """
        save_tf_checkpoint(self.sess, path)

    def load_tf_checkpoint(self, path):
        """
        Load tensorflow checkpoint to this estimator.
        :param path: tensorflow checkpoint path.
        """
        load_tf_checkpoint(self.sess, path)

    def get_model(self):
        """
        Get_model is not supported in tensorflow graph estimator
        """
        raise NotImplementedError

    def save(self, model_path):
        """
        Save model (tensorflow checkpoint) to model_path

        :param model_path: path to save the trained model.
        :return:
        """
        self.save_tf_checkpoint(model_path)

    def load(self, model_path):
        """
        Load existing model (tensorflow checkpoint) from model_path
        :param model_path: Path to the existing tensorflow checkpoint.
        :return:
        """
        self.load_tf_checkpoint(model_path)

    def clear_gradient_clipping(self):
        """
        Clear gradient clipping is not supported in TensorFlowEstimator.
        """
        raise NotImplementedError

    def set_constant_gradient_clipping(self, min, max):
        """
        Set constant gradient clipping is not supported in TensorFlowEstimator. Please pass the
        clip_value to Estimator.from_graph.
        """
        raise NotImplementedError

    def set_l2_norm_gradient_clipping(self, clip_norm):
        """
        Set l2 norm gradient clipping is not supported in TensorFlowEstimator. Please pass the
        clip_norm to Estimator.from_graph.
        """
        raise NotImplementedError

    def shutdown(self):
        """
        Close TensorFlow session and release resources.
        """
        self.sess.close()
示例#8
0
    def _bert_squad_model_fn(features, labels, mode, params):
        import tensorflow as tf
        from bigdl.orca.tfpark import ZooOptimizer
        final_hidden = bert_model(features, labels, mode,
                                  params).get_sequence_output()
        final_hidden_shape = modeling.get_shape_list(final_hidden,
                                                     expected_rank=3)
        batch_size = final_hidden_shape[0]
        seq_length = final_hidden_shape[1]
        hidden_size = final_hidden_shape[2]

        output_weights = tf.get_variable(
            "cls/squad/output_weights", [2, hidden_size],
            initializer=tf.truncated_normal_initializer(stddev=0.02))
        output_bias = tf.get_variable("cls/squad/output_bias", [2],
                                      initializer=tf.zeros_initializer())

        final_hidden_matrix = tf.reshape(
            final_hidden, [batch_size * seq_length, hidden_size])
        logits = tf.matmul(final_hidden_matrix,
                           output_weights,
                           transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)

        logits = tf.reshape(logits, [batch_size, seq_length, 2])
        logits = tf.transpose(logits, [2, 0, 1])
        unstacked_logits = tf.unstack(logits, axis=0)
        (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1])

        if mode == tf.estimator.ModeKeys.TRAIN:

            def compute_loss(logits, positions):
                one_hot_positions = tf.one_hot(positions,
                                               depth=seq_length,
                                               dtype=tf.float32)
                log_probs = tf.nn.log_softmax(logits, axis=-1)
                loss = -tf.reduce_mean(
                    tf.reduce_sum(one_hot_positions * log_probs, axis=-1))
                return loss

            start_positions = labels["start_positions"]
            end_positions = labels["end_positions"]

            start_loss = compute_loss(start_logits, start_positions)
            end_loss = compute_loss(end_logits, end_positions)
            total_loss = (start_loss + end_loss) / 2.0
            train_op = ZooOptimizer(optimizer).minimize(total_loss)
            return tf.estimator.EstimatorSpec(mode=mode,
                                              train_op=train_op,
                                              loss=total_loss)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                "unique_ids": features["unique_ids"],
                "start_logits": start_logits,
                "end_logits": end_logits,
            }
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)
        else:
            raise ValueError(
                "Currently only TRAIN and PREDICT modes are supported. "
                "SQuAD uses a separate script for EVAL")
示例#9
0
            image = data['image']
            label = data['label']
            one_hot_label = tf.one_hot(label, depth=10)
            noise = tf.random.normal(mean=0.0, stddev=1.0, shape=(NOISE_DIM, ))
            generator_inputs = (noise, one_hot_label)
            discriminator_inputs = ((tf.to_float(image) / 255.0) - 0.5) * 2
            return (generator_inputs, discriminator_inputs)

        ds = tfds.load("mnist", split="train")
        ds = ds.map(map_func)
        dataset = TFDataset.from_tf_data_dataset(ds, batch_size=56)
        return dataset

    opt = GANEstimator(
        generator_fn=conditional_generator,
        discriminator_fn=conditional_discriminator,
        generator_loss_fn=wasserstein_generator_loss,
        discriminator_loss_fn=wasserstein_discriminator_loss,
        generator_optimizer=ZooOptimizer(tf.train.AdamOptimizer(1e-5, 0.5)),
        discriminator_optimizer=ZooOptimizer(tf.train.AdamOptimizer(1e-4,
                                                                    0.5)),
        model_dir=MODEL_DIR,
        session_config=tf.ConfigProto())

    for i in range(20):
        opt.train(input_fn, MaxIteration(1000))
        eval()

    print("finished...")
    sc.stop()
示例#10
0
    feature_columns = []
    for feature_name in CATEGORICAL_COLUMNS:
        vocabulary = dftrain[feature_name].unique()
        feature_columns.append(
            tf.feature_column.categorical_column_with_vocabulary_list(
                feature_name, vocabulary))

    for feature_name in NUMERIC_COLUMNS:
        feature_columns.append(
            tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

    sc = init_nncontext()

    linear_est = tf.estimator.LinearClassifier(
        feature_columns=feature_columns,
        optimizer=ZooOptimizer(tf.train.FtrlOptimizer(0.2)),
        model_dir="/tmp/estimator/linear")
    zoo_est = TFEstimator(linear_est)
    train_input_fn = make_input_fn(dftrain,
                                   y_train,
                                   mode=tf.estimator.ModeKeys.TRAIN,
                                   batch_size=32)
    zoo_est.train(train_input_fn, steps=200)

    eval_input_fn = make_input_fn(dfeval,
                                  y_eval,
                                  mode=tf.estimator.ModeKeys.EVAL,
                                  batch_per_thread=8)
    eval_result = zoo_est.evaluate(eval_input_fn, ["acc"])
    print(eval_result)