示例#1
0
def get_wide_deep():
  # define column types
  races = ['White', 'Black', 'American Indian', 'Chinese', 
           'Japanese', 'Hawaiian', 'Filipino', 'Unknown',
           'Asian Indian', 'Korean', 'Samaon', 'Vietnamese']
  is_male,mother_age,mother_race,plurality,gestation_weeks,mother_married,cigarette_use,alcohol_use = \
   [ \
    tflayers.sparse_column_with_keys('is_male', keys=['True', 'False']),
    tflayers.real_valued_column('mother_age'),
    tflayers.sparse_column_with_keys('mother_race', keys=races),
    tflayers.real_valued_column('plurality'),
    tflayers.real_valued_column('gestation_weeks'),
    tflayers.sparse_column_with_keys('mother_married', keys=['True', 'False']),
    tflayers.sparse_column_with_keys('cigarette_use', keys=['True', 'False', 'None']),
    tflayers.sparse_column_with_keys('alcohol_use', keys=['True', 'False', 'None'])
    ]

  # which columns are wide (sparse, linear relationship to output) and which are deep (complex relationship to output?)  
  wide = [is_male, mother_race, plurality, mother_married, cigarette_use, alcohol_use]
  deep = [\
                mother_age,
                gestation_weeks,
                tflayers.embedding_column(mother_race, 3)
               ]
  return wide, deep
示例#2
0
    def testLinearlySeparableBinaryDataNoKernels(self):
        """Tests classifier w/o kernels (log. regression) for lin-separable data."""

        feature1 = layers.real_valued_column('feature1')
        feature2 = layers.real_valued_column('feature2')

        logreg_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[feature1, feature2])
        logreg_classifier.fit(input_fn=_linearly_separable_binary_input_fn,
                              steps=100)

        metrics = logreg_classifier.evaluate(
            input_fn=_linearly_separable_binary_input_fn, steps=1)
        # Since the data is linearly separable, the classifier should have small
        # loss and perfect accuracy.
        self.assertLess(metrics['loss'], 0.1)
        self.assertEqual(metrics['accuracy'], 1.0)

        # As a result, it should assign higher probability to class 1 for the 1st
        # and 3rd example and higher probability to class 0 for the second example.
        logreg_prob_predictions = list(
            logreg_classifier.predict_proba(
                input_fn=_linearly_separable_binary_input_fn))
        self.assertGreater(logreg_prob_predictions[0][1], 0.5)
        self.assertGreater(logreg_prob_predictions[1][0], 0.5)
        self.assertGreater(logreg_prob_predictions[2][1], 0.5)
  def testLinearlySeparableBinaryDataNoKernels(self):
    """Tests classifier w/o kernels (log. regression) for lin-separable data."""

    feature1 = layers.real_valued_column('feature1')
    feature2 = layers.real_valued_column('feature2')

    logreg_classifier = kernel_estimators.KernelLinearClassifier(
        feature_columns=[feature1, feature2])
    logreg_classifier.fit(
        input_fn=_linearly_separable_binary_input_fn, steps=100)

    metrics = logreg_classifier.evaluate(
        input_fn=_linearly_separable_binary_input_fn, steps=1)
    # Since the data is linearly separable, the classifier should have small
    # loss and perfect accuracy.
    self.assertLess(metrics['loss'], 0.1)
    self.assertEqual(metrics['accuracy'], 1.0)

    # As a result, it should assign higher probability to class 1 for the 1st
    # and 3rd example and higher probability to class 0 for the second example.
    logreg_prob_predictions = list(
        logreg_classifier.predict_proba(input_fn=
                                        _linearly_separable_binary_input_fn))
    self.assertGreater(logreg_prob_predictions[0][1], 0.5)
    self.assertGreater(logreg_prob_predictions[1][0], 0.5)
    self.assertGreater(logreg_prob_predictions[2][1], 0.5)
示例#4
0
def get_wide_deep():
  # define column types
  races = ['White', 'Black', 'American Indian', 'Chinese', 
           'Japanese', 'Hawaiian', 'Filipino', 'Unknown',
           'Asian Indian', 'Korean', 'Samaon', 'Vietnamese']
  is_male,mother_age,mother_race,plurality,gestation_weeks,mother_married,cigarette_use,alcohol_use = \
   [ \
    tflayers.sparse_column_with_keys('is_male', keys=['True', 'False']),
    tflayers.real_valued_column('mother_age'),
    tflayers.sparse_column_with_keys('mother_race', keys=races),
    tflayers.real_valued_column('plurality'),
    tflayers.real_valued_column('gestation_weeks'),
    tflayers.sparse_column_with_keys('mother_married', keys=['True', 'False']),
    tflayers.sparse_column_with_keys('cigarette_use', keys=['True', 'False', 'None']),
    tflayers.sparse_column_with_keys('alcohol_use', keys=['True', 'False', 'None'])
    ]

  # which columns are wide (sparse, linear relationship to output) and which are deep (complex relationship to output?)  
  wide = [is_male, mother_race, plurality, mother_married, cigarette_use, alcohol_use]
  deep = [\
                mother_age,
                gestation_weeks,
                tflayers.embedding_column(mother_race, 3)
               ]
  return wide, deep
def build_estimator(model_dir, model_type):
    """build an estimator"""

    # base sparse feature process
    gender = layers.sparse_column_with_keys(column_name='gender', keys=['female', 'male'])
    education = layers.sparse_column_with_hash_bucket(column_name='education', hash_bucket_size=1000)
    relationship = layers.sparse_column_with_hash_bucket(column_name='relationship', hash_bucket_size=100)
    workclass = layers.sparse_column_with_hash_bucket(column_name='workclass', hash_bucket_size=100)
    occupation = layers.sparse_column_with_hash_bucket(column_name='occupation', hash_bucket_size=1000)
    native_country = layers.sparse_column_with_hash_bucket(column_name='native_country', hash_bucket_size=1000)

    # base continuous feature
    age = layers.real_valued_column(column_name='age')
    education_num = layers.real_valued_column(column_name='education_num')
    capital_gain = layers.real_valued_column(column_name='capital_gain')
    capital_loss = layers.real_valued_column(column_name='capital_loss')
    hours_per_week = layers.real_valued_column(column_name='hours_per_week')

    # transformation.bucketization 将连续变量转化为类别标签。从而提高我们的准确性
    age_bucket = layers.bucketized_column(source_column=age,
                                          boundaries=[18, 25, 30, 35, 40, 45,50, 55, 60, 65])

    # wide columns and deep columns
    # 深度模型使用到的特征和广度模型使用到的特征
    # 广度模型特征只只用到了分类标签
    wide_columns = [gender, native_country, education, relationship, workclass, occupation, age_bucket,
                    layers.crossed_column(columns=[education, occupation], hash_bucket_size=int(1e4)),
                    layers.crossed_column(columns=[age_bucket, education, occupation], hash_bucket_size=int(1e6)),
                    layers.crossed_column(columns=[native_country, occupation], hash_bucket_size=int(1e4))]

    deep_columns = [layers.embedding_column(workclass, dimension=8),
                    layers.embedding_column(education, dimension=8),
                    layers.embedding_column(gender, dimension=8),
                    layers.embedding_column(relationship, dimension=8),
                    layers.embedding_column(native_country, dimension=8),
                    layers.embedding_column(occupation, dimension=8),
                    age, education_num, capital_gain, capital_loss, hours_per_week]

    if model_type == "wide":
        m=learn.LinearClassifier(feature_columns=wide_columns, model_dir=model_dir)
    elif model_type == "deep":
        m=learn.DNNClassifier(feature_columns=deep_columns, model_dir=model_dir, hidden_units=[100, 50])
    else:
        m=learn.DNNLinearCombinedClassifier(model_dir=model_dir,
                                            linear_feature_columns=wide_columns,
                                            dnn_feature_columns=deep_columns,
                                            dnn_hidden_units=[256, 128, 64],
                                            dnn_activation_fn=tf.nn.relu)
    return m
  def testInvalidNumberOfClasses(self):
    """ValueError raised when the kernel mappers provided have invalid type."""

    feature = layers.real_valued_column('feature')
    with self.assertRaises(ValueError):
      _ = kernel_estimators.KernelLinearClassifier(
          feature_columns=[feature], n_classes=1)
示例#7
0
    def testInvalidNumberOfClasses(self):
        """ValueError raised when the kernel mappers provided have invalid type."""

        feature = layers.real_valued_column('feature')
        with self.assertRaises(ValueError):
            _ = kernel_estimators.KernelLinearClassifier(
                feature_columns=[feature], n_classes=1)
示例#8
0
    def testMulticlassDataWithAndWithoutKernels(self):
        """Tests classifier w/ and w/o kernels on multiclass data."""
        feature_column = layers.real_valued_column('feature', dimension=4)

        # Metrics for linear classifier (no kernels).
        linear_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[feature_column], n_classes=3)
        linear_classifier.fit(input_fn=test_data.iris_input_multiclass_fn,
                              steps=50)
        linear_metrics = linear_classifier.evaluate(
            input_fn=test_data.iris_input_multiclass_fn, steps=1)
        linear_loss = linear_metrics['loss']
        linear_accuracy = linear_metrics['accuracy']

        # Using kernel mappers allows to discover non-linearities in data (via RBF
        # kernel approximation), reduces loss and increases accuracy.
        kernel_mappers = {
            feature_column: [
                RandomFourierFeatureMapper(input_dim=4,
                                           output_dim=50,
                                           stddev=1.0,
                                           name='rffm')
            ]
        }
        kernel_linear_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[], n_classes=3, kernel_mappers=kernel_mappers)
        kernel_linear_classifier.fit(
            input_fn=test_data.iris_input_multiclass_fn, steps=50)
        kernel_linear_metrics = kernel_linear_classifier.evaluate(
            input_fn=test_data.iris_input_multiclass_fn, steps=1)
        kernel_linear_loss = kernel_linear_metrics['loss']
        kernel_linear_accuracy = kernel_linear_metrics['accuracy']
        self.assertLess(kernel_linear_loss, linear_loss)
        self.assertGreater(kernel_linear_accuracy, linear_accuracy)
def main():
    # If the training and test sets aren't stored locally, download them.
    if not os.path.exists(IRIS_TRAINING):
        raw = urlopen(IRIS_TRAINING_URL).read()
        with open(IRIS_TRAINING, "wb") as f:
            f.write(raw)

    if not os.path.exists(IRIS_TEST):
        raw = urlopen(IRIS_TEST_URL).read()
        with open(IRIS_TEST, "wb") as f:
            f.write(raw)

    # Load datasets.
    training_set = load_csv_with_header(filename=IRIS_TRAINING,
                                        target_dtype=np.int,
                                        features_dtype=np.float32)
    test_set = load_csv_with_header(filename=IRIS_TEST,
                                    target_dtype=np.int,
                                    features_dtype=np.float32)

    # Specify that all features have real-value data
    feature_columns = [real_valued_column("", dimension=4)]

    # Build 3 layer DNN with 10, 20, 10 units respectively.
    classifier = DNNClassifier(feature_columns=feature_columns,
                               hidden_units=[10, 20, 10],
                               n_classes=3,
                               model_dir="/tmp/iris_model")

    # Define the training inputs
    def get_train_inputs():
        x = tf.constant(training_set.data)
        y = tf.constant(training_set.target)

        return x, y

    # Fit model.
    classifier.fit(input_fn=get_train_inputs, steps=2000)

    # Define the test inputs
    def get_test_inputs():
        x = tf.constant(test_set.data)
        y = tf.constant(test_set.target)

        return x, y

    # Evaluate accuracy.
    accuracy_score = classifier.evaluate(input_fn=get_test_inputs,
                                         steps=1)["accuracy"]

    print("\nTest Accuracy: {0:f}\n".format(accuracy_score))

    # Classify two new flower samples.
    def new_samples():
        return np.array([[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]],
                        dtype=np.float32)

    predictions = list(classifier.predict(input_fn=new_samples))

    print("New Samples, Class Predictions:    {}\n".format(predictions))
def get_conv_classifier():
    n_classes = 5
    feature_columns = [layers.real_valued_column("", dimension=3)]

    # learning_rate = 1.0
    # optimizer = AdagradOptimizer(learning_rate)
    #
    # learning_rate = 1.0
    # optimizer = AdadeltaOptimizer(learning_rate=learning_rate)

    # ~ 62.55%
    learning_rate = 0.01
    optimizer = AdamOptimizer(learning_rate, epsilon=0.1)

    # learning_rate = 0.05
    # optimizer = GradientDescentOptimizer(learning_rate)

    # learning_rate = 0.1
    # optimizer = RMSPropOptimizer(learning_rate, momentum=0.1)

    # learning_rate = 0.1
    # optimizer = FtrlOptimizer(learning_rate)

    return SKCompat(Estimator(
        model_fn=get_conv_model,
        params={
            'head': head_lib._multi_class_head(  # pylint: disable=protected-access
                n_classes,
                enable_centered_bias=False),
            'feature_columns': feature_columns,
            'activation_fn': tf.nn.relu,
            'learning_rate': learning_rate,
            'optimizer': optimizer
        },
        model_dir='saved_model'))
示例#11
0
 def get_feature_columns(self):
   """Get a list of feature column names."""
   feature_columns = [
       'idx_{}.coef_{:.3f}'.format(i, self._coefficients[i])
       for i in range(self._num_feature)
   ]
   return [contrib_layers.real_valued_column(fc) for fc in feature_columns]
示例#12
0
def _add_bias_column(feature_columns, columns_to_tensors, bias_variable,
                     columns_to_variables):
  """Adds a fake bias feature column filled with all 1s."""
  # TODO(b/31008490): Move definition to a common constants place.
  bias_column_name = "tf_virtual_bias_column"
  if any(col.name is bias_column_name for col in feature_columns):
    raise ValueError("%s is a reserved column name." % bias_column_name)
  if not feature_columns:
    raise ValueError("feature_columns can't be empty.")

  # Loop through input tensors until we can figure out batch_size.
  batch_size = None
  for column in columns_to_tensors.values():
    if isinstance(column, tuple):
      column = column[0]
    if isinstance(column, sparse_tensor.SparseTensor):
      shape = tensor_util.constant_value(column.dense_shape)
      if shape is not None:
        batch_size = shape[0]
        break
    else:
      batch_size = array_ops.shape(column)[0]
      break
  if batch_size is None:
    raise ValueError("Could not infer batch size from input features.")

  bias_column = layers.real_valued_column(bias_column_name)
  columns_to_tensors[bias_column] = array_ops.ones([batch_size, 1],
                                                   dtype=dtypes.float32)
  columns_to_variables[bias_column] = [bias_variable]
  def testMulticlassDataWithAndWithoutKernels(self):
    """Tests classifier w/ and w/o kernels on multiclass data."""
    feature_column = layers.real_valued_column('feature', dimension=4)

    # Metrics for linear classifier (no kernels).
    linear_classifier = kernel_estimators.KernelLinearClassifier(
        feature_columns=[feature_column], n_classes=3)
    linear_classifier.fit(input_fn=test_data.iris_input_multiclass_fn, steps=50)
    linear_metrics = linear_classifier.evaluate(
        input_fn=test_data.iris_input_multiclass_fn, steps=1)
    linear_loss = linear_metrics['loss']
    linear_accuracy = linear_metrics['accuracy']

    # Using kernel mappers allows to discover non-linearities in data (via RBF
    # kernel approximation), reduces loss and increases accuracy.
    kernel_mappers = {
        feature_column: [
            RandomFourierFeatureMapper(
                input_dim=4, output_dim=50, stddev=1.0, name='rffm')
        ]
    }
    kernel_linear_classifier = kernel_estimators.KernelLinearClassifier(
        feature_columns=[], n_classes=3, kernel_mappers=kernel_mappers)
    kernel_linear_classifier.fit(
        input_fn=test_data.iris_input_multiclass_fn, steps=50)
    kernel_linear_metrics = kernel_linear_classifier.evaluate(
        input_fn=test_data.iris_input_multiclass_fn, steps=1)
    kernel_linear_loss = kernel_linear_metrics['loss']
    kernel_linear_accuracy = kernel_linear_metrics['accuracy']
    self.assertLess(kernel_linear_loss, linear_loss)
    self.assertGreater(kernel_linear_accuracy, linear_accuracy)
示例#14
0
文件: customer.py 项目: zjjott/html
def CustomerTrainTask(self, dataset_id, model_id, user_id, **kwargs):
    steps = 2000
    dataset = DatasetModel.query().get(dataset_id)
    csv_file = StringIO(dataset.data)
    training_set = load_dataset(csv_file)
    model_attrs = {}
    if "dimension" in kwargs:
        model_attrs["feature_columns"] = [
            real_valued_column("", dimension=kwargs['dimension'])
        ]
    if "hidden_units" in kwargs:
        model_attrs['hidden_units'] = map(int, kwargs['hidden_units'])
    if "n_classes" in kwargs:
        model_attrs['n_classes'] = kwargs['n_classes']
    model_db = MLMethod.query().get(model_id)
    model = loads(model_db.data)
    temp_folder = mkdtemp()
    fd, filepath = mkstemp(suffix="tar.gz")
    try:
        classifier = model(model_dir=temp_folder, **model_attrs)
        classifier = classifier.fit(x=training_set.data,
                                    y=training_set.target,
                                    steps=steps)
        tar = tarfile.open(filepath, "w:gz")
        tar.add(temp_folder, arcname="model")
        tar.close()
        with open(filepath, "rb") as fout:
            trained = MLMethod(
                user_id=user_id,
                name=model_db.name,
                description="%s 在 %s 上的模型" % (model_db.name, dataset.name),
                public=model_db.public,
                trained=True,
                data=dumps((CustomerPredictTask, model, kwargs, fout.read()),
                           HIGHEST_PROTOCOL))
        trained.save_object()
        MethodKwargs(
            model_id=trained.id,
            name="file",
            label="数据文件",
            description="数据文件和文本数据选其一即可",
            required=False,
            type="file",
        ).save_object()
        MethodKwargs(
            model_id=trained.id,
            name="data",
            label="数据文本",
            description="数据文件和文本数据选其一即可",
            required=False,
            type="str",
        ).save_object()
    finally:
        # 删这个文件会报错,所以干脆不删好了。。
        pass
        # os.unlink(filepath)
        # rmtree(temp_folder)
        # MethodKwargs(model_id)
    return trained.id
示例#15
0
def _add_bias_column(feature_columns, columns_to_tensors, bias_variable, targets, columns_to_variables):
    # TODO(b/31008490): Move definition to a common constants place.
    bias_column_name = "tf_virtual_bias_column"
    if any(col.name is bias_column_name for col in feature_columns):
        raise ValueError("%s is a reserved column name." % bias_column_name)
    bias_column = layers.real_valued_column(bias_column_name)
    columns_to_tensors[bias_column] = array_ops.ones_like(targets, dtype=dtypes.float32)
    columns_to_variables[bias_column] = [bias_variable]
示例#16
0
def get_feature_column(mode):
    feature_columns = []

    feature_columns.append(layers.real_valued_column(
        column_name = 'res', dimension = TEXT_FEATURE_SIZE, dtype = tf.int64))
    feature_columns.append(layers.real_valued_column(
        column_name = 'res_len', dimension = 1, dtype = tf.int64))

 
    feature_columns.append(layers.real_valued_column(
        column_name = 'utters', dimension = TEXT_FEATURE_SIZE*TURN_FEATURE_SIZE, dtype = tf.int64))
    feature_columns.append(layers.real_valued_column(
            column_name = 'utters_len', dimension = TURN_FEATURE_SIZE, dtype = tf.int64))

    if mode == learn.ModeKeys.TRAIN:
        feature_columns.append(layers.real_valued_column(
            column_name = 'label', dimension = 1, dtype = tf.int64))
    elif mode == learn.ModeKeys.EVAL:
        for i in xrange(DISTRACTOR_COUNT):  
            feature_columns.append(layers.real_valued_column(
                column_name = 'distractor_{}'.format(i),
                dimension = TEXT_FEATURE_SIZE,
                dtype = tf.int64))
            feature_columns.append(layers.real_valued_column(
                column_name = 'distractor_{}_len'.format(i),
                dimension = 1,
                dtype = tf.int64))

    #print('feature_columns=%s' % (feature_columns))
    return set(feature_columns)
示例#17
0
def get_wide_deep():
    # define column types
    
    StyleName,quantity, demand, org_ret_price,sell_price, margin, off_orig_retail, total_ots = \
    [ \
    tflayers.sparse_column_with_hash_bucket('Style_Name', hash_bucket_size = 1000),
    tflayers.real_valued_column('Quantity'),
    tflayers.real_valued_column('Demand'),
    tflayers.real_valued_column('Original_Retail_Price'),
    tflayers.real_valued_column('Selling_Price'),
    tflayers.real_valued_column('Margin'),
    tflayers.real_valued_column('off_Orig_Retail'),
    tflayers.real_valued_column('Total_OTS'),
    ]
    # which columns are wide (sparse, linear relationship to output) and which are deep (complex relationship to output?)  
    wide = [StyleName,quantity, demand]
    deep = [\
               org_ret_price,
               sell_price,
               margin,
               off_orig_retail,
               total_ots,
               tflayers.embedding_column(StyleName, 3)
               ]
    return wide, deep
示例#18
0
def get_features():
    # Using three basic inputs
    real = {
      colname : tflayers.real_valued_column(colname) \
          for colname in \
            ('dep_delay,taxiout,distance').split(',')
    }
    sparse = {}
    return real, sparse
示例#19
0
def get_features_ch8():
    # Using the basic three inputs plus calculated time averages
    real = {
      colname: tflayers.real_valued_column(colname) \
      for colname in \
      ('dep_delay,taxiout,distance,avg_dep_delay,avg_arr_delay').split(',')
    }
    sparse = {}
    return real, sparse
示例#20
0
 def get_feature_columns(self):
   """Get a list of feature column names."""
   num_feature = self._num_pair * 2
   x1_col = ['xorpair_{}.idx_{}'.format(i, i) for i in range(self._num_pair)]
   x2_col = [
       'xorpair_{}.idx_{}'.format(i - self._num_pair, i)
       for i in range(self._num_pair, num_feature)
   ]
   return [contrib_layers.real_valued_column(fc) for fc in x1_col + x2_col]
示例#21
0
def get_features_ch7():
    """Using only the three inputs we originally used in Chapter 7"""
    real = {
      colname : tflayers.real_valued_column(colname) \
          for colname in \
            ('dep_delay,taxiout,distance').split(',')
    }
    sparse = {}
    return real, sparse
示例#22
0
def get_features_ch8():
    """Using the three inputs we originally used in Chapter 7, plus the time averages computed in Chapter 8"""
    real = {
      colname : tflayers.real_valued_column(colname) \
          for colname in \
            ('dep_delay,taxiout,distance,avg_dep_delay,avg_arr_delay').split(',')
    }
    sparse = {}
    return real, sparse
示例#23
0
def _add_bias_column(feature_columns, columns_to_tensors, bias_variable,
                     targets, columns_to_variables):
    # TODO(b/31008490): Move definition to a common constants place.
    bias_column_name = "tf_virtual_bias_column"
    if any(col.name is bias_column_name for col in feature_columns):
        raise ValueError("%s is a reserved column name." % bias_column_name)
    bias_column = layers.real_valued_column(bias_column_name)
    columns_to_tensors[bias_column] = array_ops.ones_like(targets,
                                                          dtype=dtypes.float32)
    columns_to_variables[bias_column] = [bias_variable]
示例#24
0
    def testInvalidKernelMapper(self):
        """ValueError raised when the kernel mappers provided have invalid type."""
        class DummyKernelMapper(object):
            def __init__(self):
                pass

        feature = layers.real_valued_column('feature')
        kernel_mappers = {feature: [DummyKernelMapper()]}
        with self.assertRaises(ValueError):
            _ = kernel_estimators.KernelLinearClassifier(
                feature_columns=[feature], kernel_mappers=kernel_mappers)
def get_classifier():
    # (kernel_size * kernel_size, 3)
    feature_columns = [layers.real_valued_column("", dimension=3)]
    return DNNClassifier(feature_columns=feature_columns,
                         hidden_units=[256, 128],
                         n_classes=5,
                         model_dir="saved_model",
                         # optimizer=AdadeltaOptimizer(learning_rate=0.1)
                         # optimizer=AdamOptimizer()
                         # dropout=0.5
                         )
示例#26
0
def _dnn_feature_columns(feature_columns):
    """ generate dnn feature columns
    """
    dnn_columns = []
    for col in feature_columns:
        dnn_col = real_valued_column(col, dtype=tf.float64)
        if isinstance(col, _SparseColumnKeys):
            dnn_columns.append(one_hot_column(dnn_col))
        else:
            dnn_columns.append(dnn_col)
    return dnn_columns
示例#27
0
def get_classifier():
    # (kernel_size * kernel_size, 3)
    feature_columns = [layers.real_valued_column("", dimension=3)]
    return DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=[256, 128],
        n_classes=5,
        model_dir="saved_model",
        # optimizer=AdadeltaOptimizer(learning_rate=0.1)
        # optimizer=AdamOptimizer()
        # dropout=0.5
    )
示例#28
0
 def get_feature_columns(self):
   """Get a list of feature column names."""
   out = []
   count, group = 0, 0
   for order in self._orders:
     for group_idx in range(self._num_group_per_order):
       for _ in range(order):
         out.append('mult_group_{}.idx_{}.order_{}.group_coef_{:.3}'.format(
             group, count, order,
             self._group_coefficients_by_order[order][group_idx]))
         count += 1
       group += 1
   return [contrib_layers.real_valued_column(fc) for fc in out]
  def testInvalidKernelMapper(self):
    """ValueError raised when the kernel mappers provided have invalid type."""

    class DummyKernelMapper(object):

      def __init__(self):
        pass

    feature = layers.real_valued_column('feature')
    kernel_mappers = {feature: [DummyKernelMapper()]}
    with self.assertRaises(ValueError):
      _ = kernel_estimators.KernelLinearClassifier(
          feature_columns=[feature], kernel_mappers=kernel_mappers)
示例#30
0
 def testExtractFeaturesWithTransformation(self):
     """Tests feature extraction."""
     with self.test_session():
         features = {}
         features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32)
         features["sparse_float"] = sparse_tensor.SparseTensor(
             array_ops.zeros([2, 2], dtypes.int64),
             array_ops.zeros([2], dtypes.float32),
             array_ops.zeros([2], dtypes.int64))
         features["sparse_categorical"] = sparse_tensor.SparseTensor(
             array_ops.zeros([2, 2], dtypes.int64),
             array_ops.zeros([2], dtypes.string),
             array_ops.zeros([2], dtypes.int64))
         feature_columns = set()
         feature_columns.add(layers.real_valued_column("dense_float"))
         feature_columns.add(
             layers.feature_column._real_valued_var_len_column(
                 "sparse_float", is_sparse=True))
         feature_columns.add(
             feature_column_lib.sparse_column_with_hash_bucket(
                 "sparse_categorical", hash_bucket_size=1000000))
         (fc_names, dense_floats, sparse_float_indices, sparse_float_values,
          sparse_float_shapes, sparse_int_indices, sparse_int_values,
          sparse_int_shapes) = (gbdt_batch.extract_features(
              features, feature_columns))
         self.assertEqual(len(fc_names), 3)
         self.assertAllEqual(
             fc_names,
             ["dense_float", "sparse_float", "sparse_categorical"])
         self.assertEqual(len(dense_floats), 1)
         self.assertEqual(len(sparse_float_indices), 1)
         self.assertEqual(len(sparse_float_values), 1)
         self.assertEqual(len(sparse_float_shapes), 1)
         self.assertEqual(len(sparse_int_indices), 1)
         self.assertEqual(len(sparse_int_values), 1)
         self.assertEqual(len(sparse_int_shapes), 1)
         self.assertAllEqual(dense_floats[0].eval(),
                             features["dense_float"].eval())
         self.assertAllEqual(sparse_float_indices[0].eval(),
                             features["sparse_float"].indices.eval())
         self.assertAllEqual(sparse_float_values[0].eval(),
                             features["sparse_float"].values.eval())
         self.assertAllEqual(sparse_float_shapes[0].eval(),
                             features["sparse_float"].dense_shape.eval())
         self.assertAllEqual(sparse_int_indices[0].eval(),
                             features["sparse_categorical"].indices.eval())
         self.assertAllEqual(sparse_int_values[0].eval(), [397263, 397263])
         self.assertAllEqual(
             sparse_int_shapes[0].eval(),
             features["sparse_categorical"].dense_shape.eval())
示例#31
0
def get_features_raw():
    real = {
      colname : tflayers.real_valued_column(colname) \
          for colname in \
            ('dep_delay,taxiout,distance,avg_dep_delay,avg_arr_delay' + 
             ',dep_lat,dep_lon,arr_lat,arr_lon').split(',')
    }
    sparse = {
      'carrier': tflayers.sparse_column_with_keys('carrier',
                  keys='AS,VX,F9,UA,US,WN,HA,EV,MQ,DL,OO,B6,NK,AA'.split(',')),
      'origin' : tflayers.sparse_column_with_hash_bucket('origin', hash_bucket_size=1000), # FIXME
      'dest'   : tflayers.sparse_column_with_hash_bucket('dest', hash_bucket_size=1000) #FIXME
    }
    return real, sparse
示例#32
0
def contrib_learn_classifier_test():
    """Test tf.contrib.learn.DNN_classifier."""
    language_column = layers.sparse_column_with_hash_bucket(
        "language", hash_bucket_size=20)

    feature_columns = [
        layers.embedding_column(language_column, dimension=3),
        layers.real_valued_column("age", dtype=tf.int64)
    ]

    classifier = learn.DNNClassifier(
        n_classes=3,
        feature_columns=feature_columns,
        hidden_units=[100, 100],
        config=learn.RunConfig(tf_random_seed=1,
                               model_dir="../model_saver/estimators/"
                               "DNN_classifier_01"),
        # optimizer=optimizer_exp_decay
    )
    classifier.fit(input_fn=_input_fn, steps=10000)
    print("variables_names:\n", str(classifier.get_variable_names()))
    # scores = classifier.evaluate(input_fn=_input_fn,
    #                              steps=100)
    # print("scores:\n", str(scores))

    scores = classifier.evaluate(
        input_fn=_input_fn,
        steps=100,
        metrics={
            'my_accuracy':
            MetricSpec(metric_fn=metrics.streaming_accuracy,
                       prediction_key="classes"),
            'my_precision':
            MetricSpec(metric_fn=metrics.streaming_precision,
                       prediction_key="classes"),
            'my_recall':
            MetricSpec(metric_fn=metrics.streaming_recall,
                       prediction_key="classes"),
            'my_metric':
            MetricSpec(metric_fn=my_metric_op, prediction_key="classes")
        })
    print("scores:\n", str(scores))

    predictions = classifier.predict(input_fn=_input_fn,
                                     outputs=["classes", "probabilities"])
    print("predictions")
    for prediction in predictions:
        print(prediction)
示例#33
0
 def testExtractFeaturesWithTransformation(self):
   """Tests feature extraction."""
   with self.test_session():
     features = {}
     features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32)
     features["sparse_float"] = sparse_tensor.SparseTensor(
         array_ops.zeros([2, 2], dtypes.int64),
         array_ops.zeros([2], dtypes.float32),
         array_ops.zeros([2], dtypes.int64))
     features["sparse_categorical"] = sparse_tensor.SparseTensor(
         array_ops.zeros([2, 2], dtypes.int64),
         array_ops.zeros(
             [2], dtypes.string), array_ops.zeros([2], dtypes.int64))
     feature_columns = set()
     feature_columns.add(layers.real_valued_column("dense_float"))
     feature_columns.add(
         layers.feature_column._real_valued_var_len_column(
             "sparse_float", is_sparse=True))
     feature_columns.add(
         feature_column_lib.sparse_column_with_hash_bucket(
             "sparse_categorical", hash_bucket_size=1000000))
     (fc_names, dense_floats, sparse_float_indices, sparse_float_values,
      sparse_float_shapes, sparse_int_indices, sparse_int_values,
      sparse_int_shapes) = (gbdt_batch.extract_features(
          features, feature_columns))
     self.assertEqual(len(fc_names), 3)
     self.assertAllEqual(fc_names,
                         ["dense_float", "sparse_float", "sparse_categorical"])
     self.assertEqual(len(dense_floats), 1)
     self.assertEqual(len(sparse_float_indices), 1)
     self.assertEqual(len(sparse_float_values), 1)
     self.assertEqual(len(sparse_float_shapes), 1)
     self.assertEqual(len(sparse_int_indices), 1)
     self.assertEqual(len(sparse_int_values), 1)
     self.assertEqual(len(sparse_int_shapes), 1)
     self.assertAllEqual(dense_floats[0].eval(),
                         features["dense_float"].eval())
     self.assertAllEqual(sparse_float_indices[0].eval(),
                         features["sparse_float"].indices.eval())
     self.assertAllEqual(sparse_float_values[0].eval(),
                         features["sparse_float"].values.eval())
     self.assertAllEqual(sparse_float_shapes[0].eval(),
                         features["sparse_float"].dense_shape.eval())
     self.assertAllEqual(sparse_int_indices[0].eval(),
                         features["sparse_categorical"].indices.eval())
     self.assertAllEqual(sparse_int_values[0].eval(), [397263, 397263])
     self.assertAllEqual(sparse_int_shapes[0].eval(),
                         features["sparse_categorical"].dense_shape.eval())
示例#34
0
def part4():
    global boston, x_data, y_data
    import pandas as pd
    import numpy as np
    N = 10000

    weight = np.random.randn(N) * 5 + 70
    spec_id = np.random.randint(0, 3, N)
    bias = [0.9, 1, 1.1]
    height = np.array(
        [weight[i] / 100 + bias[b] for i, b in enumerate(spec_id)])
    spec_name = ['Goblin', 'Human', 'ManBear']
    spec = [spec_name[s] for s in spec_id]

    df = pd.DataFrame({'Species': spec, 'Weight': weight, 'Height': height})

    from tensorflow.contrib import layers
    Weight = layers.real_valued_column("Weight")
    Species = layers.sparse_column_with_keys(column_name="Species",
                                             keys=spec_name)
    reg = learn.LinearRegressor(feature_columns=[Weight, Species])

    def input_fn(df):
        feature_cols = {}
        feature_cols['Weight'] = tf.constant(df['Weight'].values)

        feature_cols['Species'] = tf.SparseTensor(
            indices=[[i, 0] for i in range(df['Species'].size)],
            values=df['Species'].values,
            dense_shape=[df['Species'].size, 1])

        labels = tf.constant(df['Height'].values)

        return feature_cols, labels

    reg.fit(input_fn=lambda: input_fn(df), steps=50000)

    w_w = reg.get_variable_value('linear/Weight/weight')
    print(f"Estimation for Weight: {w_w}")

    v = reg.get_variable_names()
    print(f"Classes: {v}")

    s_w = reg.get_variable_value('linear/Species/weights')
    b = reg.get_variable_value('linear/bias_weight')
    print(f"Estimation for Species: {s_w + b}")
示例#35
0
def _maybe_add_bias_column(feature_columns, columns_to_tensors, bias_variable,
                           targets, enable_centered_bias,
                           columns_to_variables):
    train_feature_columns = list(feature_columns)  # Make a copy.
    if enable_centered_bias:
        # Adding a bias column.
        # TODO(b/31008490): Move definition to a common constants place.
        bias_column_name = "tf_virtual_bias_column"
        if any(col.name is bias_column_name for col in feature_columns):
            raise ValueError("%s is a reserved column name." %
                             bias_column_name)
        bias_column = layers.real_valued_column(bias_column_name)
        columns_to_tensors[bias_column] = array_ops.ones_like(
            targets, dtype=dtypes.float32)
        columns_to_variables[bias_column] = [bias_variable]
        train_feature_columns.append(bias_column)
    return train_feature_columns
示例#36
0
    def testClassifierWithAndWithoutKernelsNoRealValuedColumns(self):
        """Tests kernels have no effect for non-real valued columns ."""
        def input_fn():
            return {
                'price':
                constant_op.constant([[0.4], [0.6], [0.3]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
            }, constant_op.constant([[1], [0], [1]])

        price = layers.real_valued_column('price')
        country = layers.sparse_column_with_hash_bucket('country',
                                                        hash_bucket_size=5)

        linear_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[price, country])
        linear_classifier.fit(input_fn=input_fn, steps=100)
        linear_metrics = linear_classifier.evaluate(input_fn=input_fn, steps=1)
        linear_loss = linear_metrics['loss']
        linear_accuracy = linear_metrics['accuracy']

        kernel_mappers = {
            country: [RandomFourierFeatureMapper(2, 30, 0.6, 1, 'rffm')]
        }

        kernel_linear_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[price, country], kernel_mappers=kernel_mappers)
        kernel_linear_classifier.fit(input_fn=input_fn, steps=100)
        kernel_linear_metrics = kernel_linear_classifier.evaluate(
            input_fn=input_fn, steps=1)
        kernel_linear_loss = kernel_linear_metrics['loss']
        kernel_linear_accuracy = kernel_linear_metrics['accuracy']

        # The kernel mapping is applied to a non-real-valued feature column and so
        # it should have no effect on the model. The loss and accuracy of the
        # "kernelized" model should match the loss and accuracy of the initial model
        # (without kernels).
        self.assertAlmostEqual(linear_loss, kernel_linear_loss, delta=0.01)
        self.assertAlmostEqual(linear_accuracy,
                               kernel_linear_accuracy,
                               delta=0.01)
  def testClassifierWithAndWithoutKernelsNoRealValuedColumns(self):
    """Tests kernels have no effect for non-real valued columns ."""

    def input_fn():
      return {
          'price':
              constant_op.constant([[0.4], [0.6], [0.3]]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
      }, constant_op.constant([[1], [0], [1]])

    price = layers.real_valued_column('price')
    country = layers.sparse_column_with_hash_bucket(
        'country', hash_bucket_size=5)

    linear_classifier = kernel_estimators.KernelLinearClassifier(
        feature_columns=[price, country])
    linear_classifier.fit(input_fn=input_fn, steps=100)
    linear_metrics = linear_classifier.evaluate(input_fn=input_fn, steps=1)
    linear_loss = linear_metrics['loss']
    linear_accuracy = linear_metrics['accuracy']

    kernel_mappers = {
        country: [RandomFourierFeatureMapper(2, 30, 0.6, 1, 'rffm')]
    }

    kernel_linear_classifier = kernel_estimators.KernelLinearClassifier(
        feature_columns=[price, country], kernel_mappers=kernel_mappers)
    kernel_linear_classifier.fit(input_fn=input_fn, steps=100)
    kernel_linear_metrics = kernel_linear_classifier.evaluate(
        input_fn=input_fn, steps=1)
    kernel_linear_loss = kernel_linear_metrics['loss']
    kernel_linear_accuracy = kernel_linear_metrics['accuracy']

    # The kernel mapping is applied to a non-real-valued feature column and so
    # it should have no effect on the model. The loss and accuracy of the
    # "kernelized" model should match the loss and accuracy of the initial model
    # (without kernels).
    self.assertAlmostEqual(linear_loss, kernel_linear_loss, delta=0.01)
    self.assertAlmostEqual(linear_accuracy, kernel_linear_accuracy, delta=0.01)
示例#38
0
    def testLinearlyInseparableBinaryDataWithAndWithoutKernels(self):
        """Tests classifier w/ and w/o kernels on non-linearly-separable data."""
        multi_dim_feature = layers.real_valued_column('multi_dim_feature',
                                                      dimension=2)

        # Data points are non-linearly separable so there will be at least one
        # mis-classified sample (accuracy < 0.8). In fact, the loss is minimized for
        # w1=w2=0.0, in which case each example incurs a loss of ln(2). The overall
        # (average) loss should then be ln(2) and the logits should be approximately
        # 0.0 for each sample.
        logreg_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[multi_dim_feature])
        logreg_classifier.fit(input_fn=_linearly_inseparable_binary_input_fn,
                              steps=50)
        logreg_metrics = logreg_classifier.evaluate(
            input_fn=_linearly_inseparable_binary_input_fn, steps=1)
        logreg_loss = logreg_metrics['loss']
        logreg_accuracy = logreg_metrics['accuracy']
        logreg_predictions = logreg_classifier.predict(
            input_fn=_linearly_inseparable_binary_input_fn, as_iterable=False)
        self.assertAlmostEqual(logreg_loss, np.log(2), places=3)
        self.assertLess(logreg_accuracy, 0.8)
        self.assertAllClose(logreg_predictions['logits'],
                            [[0.0], [0.0], [0.0], [0.0]])

        # Using kernel mappers allows to discover non-linearities in data. Mapping
        # the data to a higher dimensional feature space using approx RBF kernels,
        # substantially reduces the loss and leads to perfect classification
        # accuracy.
        kernel_mappers = {
            multi_dim_feature:
            [RandomFourierFeatureMapper(2, 30, 0.6, 1, 'rffm')]
        }
        kernelized_logreg_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[], kernel_mappers=kernel_mappers)
        kernelized_logreg_classifier.fit(
            input_fn=_linearly_inseparable_binary_input_fn, steps=50)
        kernelized_logreg_metrics = kernelized_logreg_classifier.evaluate(
            input_fn=_linearly_inseparable_binary_input_fn, steps=1)
        kernelized_logreg_loss = kernelized_logreg_metrics['loss']
        kernelized_logreg_accuracy = kernelized_logreg_metrics['accuracy']
        self.assertLess(kernelized_logreg_loss, 0.2)
        self.assertEqual(kernelized_logreg_accuracy, 1.0)
示例#39
0
def DNNClassifierTrainTask(self, datasource, train_path, test_path, **kwargs):
    steps = kwargs.pop("steps", 2000)
    if datasource == 'system':  # data from system
        training_set = load_system_dataset(train_path)
        if test_path:
            test_set = load_system_dataset(test_path)
        feature_columns = [real_valued_column("", dimension=4)]
        classifier = DNNClassifier(feature_columns=feature_columns,
                                   **kwargs
                                   # hidden_units=[10, 20, 10],
                                   # n_classes=3
                                   )
        if test_path:
            classifier.fit(x=training_set.data,
                           y=training_set.target,
                           steps=steps)
            accuracy_score = classifier.evaluate(x=test_set.data,
                                                 y=test_set.target)["accuracy"]
            return accuracy_score
示例#40
0
    def testVariablesWithAndWithoutKernels(self):
        """Tests variables w/ and w/o kernel."""
        multi_dim_feature = layers.real_valued_column('multi_dim_feature',
                                                      dimension=2)

        linear_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[multi_dim_feature])
        linear_classifier.fit(input_fn=_linearly_inseparable_binary_input_fn,
                              steps=50)
        linear_variables = linear_classifier.get_variable_names()
        self.assertIn('linear/multi_dim_feature/weight', linear_variables)
        self.assertIn('linear/bias_weight', linear_variables)
        linear_weights = linear_classifier.get_variable_value(
            'linear/multi_dim_feature/weight')
        linear_bias = linear_classifier.get_variable_value(
            'linear/bias_weight')

        kernel_mappers = {
            multi_dim_feature:
            [RandomFourierFeatureMapper(2, 30, 0.6, 1, 'rffm')]
        }
        kernel_linear_classifier = kernel_estimators.KernelLinearClassifier(
            feature_columns=[], kernel_mappers=kernel_mappers)
        kernel_linear_classifier.fit(
            input_fn=_linearly_inseparable_binary_input_fn, steps=50)
        kernel_linear_variables = kernel_linear_classifier.get_variable_names()
        self.assertIn('linear/multi_dim_feature_MAPPED/weight',
                      kernel_linear_variables)
        self.assertIn('linear/bias_weight', kernel_linear_variables)
        kernel_linear_weights = kernel_linear_classifier.get_variable_value(
            'linear/multi_dim_feature_MAPPED/weight')
        kernel_linear_bias = kernel_linear_classifier.get_variable_value(
            'linear/bias_weight')

        # The feature column used for linear classification (no kernels) has
        # dimension 2 so the model will learn a 2-dimension weights vector (and a
        # scalar for the bias). In the kernelized model, the features are mapped to
        # a 30-dimensional feature space and so the weights variable will also have
        # dimension 30.
        self.assertEqual(2, len(linear_weights))
        self.assertEqual(1, len(linear_bias))
        self.assertEqual(30, len(kernel_linear_weights))
        self.assertEqual(1, len(kernel_linear_bias))
  def testLinearlyInseparableBinaryDataWithAndWithoutKernels(self):
    """Tests classifier w/ and w/o kernels on non-linearly-separable data."""
    multi_dim_feature = layers.real_valued_column(
        'multi_dim_feature', dimension=2)

    # Data points are non-linearly separable so there will be at least one
    # mis-classified sample (accuracy < 0.8). In fact, the loss is minimized for
    # w1=w2=0.0, in which case each example incurs a loss of ln(2). The overall
    # (average) loss should then be ln(2) and the logits should be approximately
    # 0.0 for each sample.
    logreg_classifier = kernel_estimators.KernelLinearClassifier(
        feature_columns=[multi_dim_feature])
    logreg_classifier.fit(
        input_fn=_linearly_inseparable_binary_input_fn, steps=50)
    logreg_metrics = logreg_classifier.evaluate(
        input_fn=_linearly_inseparable_binary_input_fn, steps=1)
    logreg_loss = logreg_metrics['loss']
    logreg_accuracy = logreg_metrics['accuracy']
    logreg_predictions = logreg_classifier.predict(
        input_fn=_linearly_inseparable_binary_input_fn, as_iterable=False)
    self.assertAlmostEqual(logreg_loss, np.log(2), places=3)
    self.assertLess(logreg_accuracy, 0.8)
    self.assertAllClose(logreg_predictions['logits'], [[0.0], [0.0], [0.0],
                                                       [0.0]])

    # Using kernel mappers allows to discover non-linearities in data. Mapping
    # the data to a higher dimensional feature space using approx RBF kernels,
    # substantially reduces the loss and leads to perfect classification
    # accuracy.
    kernel_mappers = {
        multi_dim_feature: [RandomFourierFeatureMapper(2, 30, 0.6, 1, 'rffm')]
    }
    kernelized_logreg_classifier = kernel_estimators.KernelLinearClassifier(
        feature_columns=[], kernel_mappers=kernel_mappers)
    kernelized_logreg_classifier.fit(
        input_fn=_linearly_inseparable_binary_input_fn, steps=50)
    kernelized_logreg_metrics = kernelized_logreg_classifier.evaluate(
        input_fn=_linearly_inseparable_binary_input_fn, steps=1)
    kernelized_logreg_loss = kernelized_logreg_metrics['loss']
    kernelized_logreg_accuracy = kernelized_logreg_metrics['accuracy']
    self.assertLess(kernelized_logreg_loss, 0.2)
    self.assertEqual(kernelized_logreg_accuracy, 1.0)
  def testVariablesWithAndWithoutKernels(self):
    """Tests variables w/ and w/o kernel."""
    multi_dim_feature = layers.real_valued_column(
        'multi_dim_feature', dimension=2)

    linear_classifier = kernel_estimators.KernelLinearClassifier(
        feature_columns=[multi_dim_feature])
    linear_classifier.fit(
        input_fn=_linearly_inseparable_binary_input_fn, steps=50)
    linear_variables = linear_classifier.get_variable_names()
    self.assertIn('linear/multi_dim_feature/weight', linear_variables)
    self.assertIn('linear/bias_weight', linear_variables)
    linear_weights = linear_classifier.get_variable_value(
        'linear/multi_dim_feature/weight')
    linear_bias = linear_classifier.get_variable_value('linear/bias_weight')

    kernel_mappers = {
        multi_dim_feature: [RandomFourierFeatureMapper(2, 30, 0.6, 1, 'rffm')]
    }
    kernel_linear_classifier = kernel_estimators.KernelLinearClassifier(
        feature_columns=[], kernel_mappers=kernel_mappers)
    kernel_linear_classifier.fit(
        input_fn=_linearly_inseparable_binary_input_fn, steps=50)
    kernel_linear_variables = kernel_linear_classifier.get_variable_names()
    self.assertIn('linear/multi_dim_feature_MAPPED/weight',
                  kernel_linear_variables)
    self.assertIn('linear/bias_weight', kernel_linear_variables)
    kernel_linear_weights = kernel_linear_classifier.get_variable_value(
        'linear/multi_dim_feature_MAPPED/weight')
    kernel_linear_bias = kernel_linear_classifier.get_variable_value(
        'linear/bias_weight')

    # The feature column used for linear classification (no kernels) has
    # dimension 2 so the model will learn a 2-dimension weights vector (and a
    # scalar for the bias). In the kernelized model, the features are mapped to
    # a 30-dimensional feature space and so the weights variable will also have
    # dimension 30.
    self.assertEqual(2, len(linear_weights))
    self.assertEqual(1, len(linear_bias))
    self.assertEqual(30, len(kernel_linear_weights))
    self.assertEqual(1, len(kernel_linear_bias))
 def test_savedmodel_state_override(self):
   random_model = RandomStateSpaceModel(
       state_dimension=5,
       state_noise_dimension=4,
       configuration=state_space_model.StateSpaceModelConfiguration(
           exogenous_feature_columns=[layers.real_valued_column("exogenous")],
           dtype=dtypes.float64, num_features=1))
   estimator = estimators.StateSpaceRegressor(
       model=random_model,
       optimizer=gradient_descent.GradientDescentOptimizer(0.1))
   combined_input_fn = input_pipeline.WholeDatasetInputFn(
       input_pipeline.NumpyReader({
           feature_keys.FilteringFeatures.TIMES: [1, 2, 3, 4],
           feature_keys.FilteringFeatures.VALUES: [1., 2., 3., 4.],
           "exogenous": [-1., -2., -3., -4.]
       }))
   estimator.train(combined_input_fn, steps=1)
   export_location = estimator.export_savedmodel(
       self.get_temp_dir(),
       estimator.build_raw_serving_input_receiver_fn())
   with ops.Graph().as_default() as graph:
     random_model.initialize_graph()
     with self.session(graph=graph) as session:
       variables.global_variables_initializer().run()
       evaled_start_state = session.run(random_model.get_start_state())
   evaled_start_state = [
       state_element[None, ...] for state_element in evaled_start_state]
   with ops.Graph().as_default() as graph:
     with self.session(graph=graph) as session:
       signatures = loader.load(
           session, [tag_constants.SERVING], export_location)
       first_split_filtering = saved_model_utils.filter_continuation(
           continue_from={
               feature_keys.FilteringResults.STATE_TUPLE: evaled_start_state},
           signatures=signatures,
           session=session,
           features={
               feature_keys.FilteringFeatures.TIMES: [1, 2],
               feature_keys.FilteringFeatures.VALUES: [1., 2.],
               "exogenous": [[-1.], [-2.]]})
       second_split_filtering = saved_model_utils.filter_continuation(
           continue_from=first_split_filtering,
           signatures=signatures,
           session=session,
           features={
               feature_keys.FilteringFeatures.TIMES: [3, 4],
               feature_keys.FilteringFeatures.VALUES: [3., 4.],
               "exogenous": [[-3.], [-4.]]
           })
       combined_filtering = saved_model_utils.filter_continuation(
           continue_from={
               feature_keys.FilteringResults.STATE_TUPLE: evaled_start_state},
           signatures=signatures,
           session=session,
           features={
               feature_keys.FilteringFeatures.TIMES: [1, 2, 3, 4],
               feature_keys.FilteringFeatures.VALUES: [1., 2., 3., 4.],
               "exogenous": [[-1.], [-2.], [-3.], [-4.]]
           })
       split_predict = saved_model_utils.predict_continuation(
           continue_from=second_split_filtering,
           signatures=signatures,
           session=session,
           steps=1,
           exogenous_features={
               "exogenous": [[[-5.]]]})
       combined_predict = saved_model_utils.predict_continuation(
           continue_from=combined_filtering,
           signatures=signatures,
           session=session,
           steps=1,
           exogenous_features={
               "exogenous": [[[-5.]]]})
   for state_key, combined_state_value in combined_filtering.items():
     if state_key == feature_keys.FilteringResults.TIMES:
       continue
     self.assertAllClose(
         combined_state_value, second_split_filtering[state_key])
   for prediction_key, combined_value in combined_predict.items():
     self.assertAllClose(combined_value, split_predict[prediction_key])
def _dnn_tree_combined_model_fn(
    features,
    labels,
    mode,
    head,
    dnn_hidden_units,
    dnn_feature_columns,
    tree_learner_config,
    num_trees,
    tree_examples_per_layer,
    config=None,
    dnn_optimizer="Adagrad",
    dnn_activation_fn=nn.relu,
    dnn_dropout=None,
    dnn_input_layer_partitioner=None,
    dnn_input_layer_to_tree=True,
    dnn_steps_to_train=10000,
    predict_with_tree_only=False,
    tree_feature_columns=None,
    tree_center_bias=False,
    dnn_to_tree_distillation_param=None,
    use_core_versions=False,
    output_type=model.ModelBuilderOutputType.MODEL_FN_OPS):
  """DNN and GBDT combined model_fn.

  Args:
    features: `dict` of `Tensor` objects.
    labels: Labels used to train on.
    mode: Mode we are in. (TRAIN/EVAL/INFER)
    head: A `Head` instance.
    dnn_hidden_units: List of hidden units per layer.
    dnn_feature_columns: An iterable containing all the feature columns
      used by the model's DNN.
    tree_learner_config: A config for the tree learner.
    num_trees: Number of trees to grow model to after training DNN.
    tree_examples_per_layer: Number of examples to accumulate before
      growing the tree a layer. This value has a big impact on model
      quality and should be set equal to the number of examples in
      training dataset if possible. It can also be a function that computes
      the number of examples based on the depth of the layer that's
      being built.
    config: `RunConfig` of the estimator.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN. If `None`, will use the Adagrad
      optimizer with default learning rate of 0.001.
    dnn_activation_fn: Activation function applied to each layer of the DNN.
      If `None`, will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability to drop out a given
      unit in the DNN.
    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
    as a feature to the tree.
    dnn_steps_to_train: Number of steps to train dnn for before switching
      to gbdt.
    predict_with_tree_only: Whether to use only the tree model output as the
      final prediction.
    tree_feature_columns: An iterable containing all the feature columns
      used by the model's boosted trees. If dnn_input_layer_to_tree is
      set to True, these features are in addition to dnn_feature_columns.
    tree_center_bias: Whether a separate tree should be created for
      first fitting the bias.
    dnn_to_tree_distillation_param: A Tuple of (float, loss_fn), where the
      float defines the weight of the distillation loss, and the loss_fn, for
      computing distillation loss, takes dnn_logits, tree_logits and weight
      tensor. If the entire tuple is None, no distillation will be applied. If
      only the loss_fn is None, we will take the sigmoid/softmax cross entropy
      loss be default. When distillation is applied, `predict_with_tree_only`
      will be set to True.
    use_core_versions: Whether feature columns and loss are from the core (as
      opposed to contrib) version of tensorflow.

  Returns:
    A `ModelFnOps` object.
  Raises:
    ValueError: if inputs are not valid.
  """
  if not isinstance(features, dict):
    raise ValueError("features should be a dictionary of `Tensor`s. "
                     "Given type: {}".format(type(features)))

  if not dnn_feature_columns:
    raise ValueError("dnn_feature_columns must be specified")

  if dnn_to_tree_distillation_param:
    if not predict_with_tree_only:
      logging.warning("update predict_with_tree_only to True since distillation"
                      "is specified.")
      predict_with_tree_only = True

  # Build DNN Logits.
  dnn_parent_scope = "dnn"
  dnn_partitioner = dnn_input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=config.num_ps_replicas, min_slice_size=64 << 20))

  if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC and
      not use_core_versions):
    raise ValueError("You must use core versions with Estimator Spec")

  with variable_scope.variable_scope(
      dnn_parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=dnn_partitioner):

    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner) as input_layer_scope:
      if use_core_versions:
        input_layer = feature_column_lib.input_layer(
            features=features,
            feature_columns=dnn_feature_columns,
            weight_collections=[dnn_parent_scope])
      else:
        input_layer = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=dnn_feature_columns,
            weight_collections=[dnn_parent_scope],
            scope=input_layer_scope)
    previous_layer = input_layer
    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
      with variable_scope.variable_scope(
          "hiddenlayer_%d" % layer_id,
          values=(previous_layer,)) as hidden_layer_scope:
        net = layers.fully_connected(
            previous_layer,
            num_hidden_units,
            activation_fn=dnn_activation_fn,
            variables_collections=[dnn_parent_scope],
            scope=hidden_layer_scope)
        if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
      _add_hidden_layer_summary(net, hidden_layer_scope.name)
      previous_layer = net
    with variable_scope.variable_scope(
        "logits", values=(previous_layer,)) as logits_scope:
      dnn_logits = layers.fully_connected(
          previous_layer,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[dnn_parent_scope],
          scope=logits_scope)
    _add_hidden_layer_summary(dnn_logits, logits_scope.name)

    def _dnn_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizers.optimize_loss(
          loss=loss,
          global_step=training_util.get_global_step(),
          learning_rate=_DNN_LEARNING_RATE,
          optimizer=_get_optimizer(dnn_optimizer),
          name=dnn_parent_scope,
          variables=ops.get_collection(
              ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope),
          # Empty summaries to prevent optimizers from logging training_loss.
          summaries=[])

  # Build Tree Logits.
  global_step = training_util.get_global_step()
  with ops.device(global_step.device):
    ensemble_handle = model_ops.tree_ensemble_variable(
        stamp_token=0,
        tree_ensemble_config="",  # Initialize an empty ensemble.
        name="ensemble_model")

  tree_features = features.copy()
  if dnn_input_layer_to_tree:
    tree_features["dnn_input_layer"] = input_layer
    tree_feature_columns.append(layers.real_valued_column("dnn_input_layer"))
  gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
      is_chief=config.is_chief,
      num_ps_replicas=config.num_ps_replicas,
      ensemble_handle=ensemble_handle,
      center_bias=tree_center_bias,
      examples_per_layer=tree_examples_per_layer,
      learner_config=tree_learner_config,
      feature_columns=tree_feature_columns,
      logits_dimension=head.logits_dimension,
      features=tree_features,
      use_core_columns=use_core_versions)

  with ops.name_scope("gbdt"):
    predictions_dict = gbdt_model.predict(mode)
    tree_logits = predictions_dict["predictions"]

    def _tree_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      if dnn_to_tree_distillation_param:
        loss_weight, loss_fn = dnn_to_tree_distillation_param
        weight_tensor = head_lib._weight_tensor(  # pylint: disable=protected-access
            features, head.weight_column_name)
        dnn_logits_fixed = array_ops.stop_gradient(dnn_logits)

        if loss_fn is None:
          # we create the loss_fn similar to the head loss_fn for
          # multi_class_head used previously as the default one.
          n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension
          loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn(
              n_classes)

        dnn_to_tree_distillation_loss = loss_weight * loss_fn(
            dnn_logits_fixed, tree_logits, weight_tensor)
        summary.scalar("dnn_to_tree_distillation_loss",
                       dnn_to_tree_distillation_loss)
        loss += dnn_to_tree_distillation_loss

      update_op = gbdt_model.train(loss, predictions_dict, labels)
      with ops.control_dependencies(
          [update_op]), (ops.colocate_with(global_step)):
        update_op = state_ops.assign_add(global_step, 1).op
        return update_op

  if predict_with_tree_only:
    if mode == model_fn.ModeKeys.TRAIN or mode == model_fn.ModeKeys.INFER:
      tree_train_logits = tree_logits
    else:
      tree_train_logits = control_flow_ops.cond(
          global_step > dnn_steps_to_train,
          lambda: tree_logits,
          lambda: dnn_logits)
  else:
    tree_train_logits = dnn_logits + tree_logits

  def _no_train_op_fn(loss):
    """Returns a no-op."""
    del loss
    return control_flow_ops.no_op()

  if tree_center_bias:
    num_trees += 1
  finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()

  if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS:
    if use_core_versions:
      model_fn_ops = head.create_estimator_spec(
          features=features,
          mode=mode,
          labels=labels,
          train_op_fn=_no_train_op_fn,
          logits=tree_train_logits)
      dnn_train_op = head.create_estimator_spec(
          features=features,
          mode=mode,
          labels=labels,
          train_op_fn=_dnn_train_op_fn,
          logits=dnn_logits)
      dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops(
          dnn_train_op).train_op

      tree_train_op = head.create_estimator_spec(
          features=tree_features,
          mode=mode,
          labels=labels,
          train_op_fn=_tree_train_op_fn,
          logits=tree_train_logits)
      tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops(
          tree_train_op).train_op

      model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops(
          model_fn_ops)
    else:
      model_fn_ops = head.create_model_fn_ops(
          features=features,
          mode=mode,
          labels=labels,
          train_op_fn=_no_train_op_fn,
          logits=tree_train_logits)
      dnn_train_op = head.create_model_fn_ops(
          features=features,
          mode=mode,
          labels=labels,
          train_op_fn=_dnn_train_op_fn,
          logits=dnn_logits).train_op
      tree_train_op = head.create_model_fn_ops(
          features=tree_features,
          mode=mode,
          labels=labels,
          train_op_fn=_tree_train_op_fn,
          logits=tree_train_logits).train_op

    # Add the hooks
    model_fn_ops.training_hooks.extend([
        trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train,
                                    tree_train_op),
        trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                      finalized_trees)
    ])
    return model_fn_ops

  elif output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC:
    fusion_spec = head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_no_train_op_fn,
        logits=tree_train_logits)
    dnn_spec = head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_dnn_train_op_fn,
        logits=dnn_logits)
    tree_spec = head.create_estimator_spec(
        features=tree_features,
        mode=mode,
        labels=labels,
        train_op_fn=_tree_train_op_fn,
        logits=tree_train_logits)

    training_hooks = [
        trainer_hooks.SwitchTrainOp(dnn_spec.train_op, dnn_steps_to_train,
                                    tree_spec.train_op),
        trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                      finalized_trees)
    ]
    fusion_spec = fusion_spec._replace(training_hooks=training_hooks +
                                       list(fusion_spec.training_hooks))
    return fusion_spec
示例#45
0
from tensorflow.contrib.layers import bucketized_column, crossed_column, embedding_column, sparse_column_with_keys, sparse_column_with_hash_bucket, real_valued_column
from tempfile import mkdtemp

PATH_TO_DIRECTORY_OF_THIS_FILE = dirname(realpath(__file__))
PATH_TO_DIRECTORY_OF_INPUT_DATA = PATH_TO_DIRECTORY_OF_THIS_FILE + "/data/input"
MODEL_DIR = PATH_TO_DIRECTORY_OF_THIS_FILE + "/classifier"

CATEGORICAL_COLUMNS = ["admin_level", "country_code", "edit_distance", "has_mpoly", "has_pcode", "is_country", "is_highest_population", "is_lowest_admin_level", "matches_topic"]
CONTINUOUS_COLUMNS = ["cluster_frequency", "country_rank", "median_distance", "population", "popularity"]
LABEL_COLUMN = "correct"
COLUMNS = sorted(CATEGORICAL_COLUMNS + CONTINUOUS_COLUMNS) + [LABEL_COLUMN]
print "COLUMNS:", COLUMNS


admin_level = sparse_column_with_keys(column_name="admin_level", keys=["None","0","1","2","3","4","5","6"]) # I've never seen admin 6, but you never know!
cluster_frequency = real_valued_column("cluster_frequency")
cluster_frequency_buckets = bucketized_column(cluster_frequency, boundaries=[0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1])
country_code = sparse_column_with_hash_bucket("country_code", hash_bucket_size=500)
country_rank = real_valued_column("country_rank")
edit_distance = sparse_column_with_keys(column_name="edit_distance", keys=["0", "1", "2"])
has_pcode = sparse_column_with_keys(column_name="has_pcode", keys=["True", "False"])
has_mpoly = sparse_column_with_keys(column_name="has_mpoly", keys=["True", "False"])
is_country = sparse_column_with_keys(column_name="is_country", keys=["True", "False"])
is_lowest_admin_level = sparse_column_with_keys(column_name="is_lowest_admin_level", keys=["True", "False"])
is_highest_population = sparse_column_with_keys(column_name="is_highest_population", keys=["True", "False"])
matches_topic = sparse_column_with_keys(column_name="matches_topic", keys=["True", "False"])
median_distance = real_valued_column("median_distance")
median_distance_buckets = bucketized_column(median_distance, boundaries=[10,50,100,200,300])
population = real_valued_column("population")
population_buckets = bucketized_column(population, boundaries=[0, 1, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000])
popularity = real_valued_column("popularity")
示例#46
0
def build_feature_cols():
  # Sparse base columns.
  gender = tf.contrib.layers.sparse_column_with_keys(
            column_name="gender",
            keys=["female", "male"])
  race = tf.contrib.layers.sparse_column_with_keys(
            column_name="race",
            keys=["Amer-Indian-Eskimo",
                  "Asian-Pac-Islander",
                  "Black", "Other",
                  "White"])

  education = tf.contrib.layers.sparse_column_with_hash_bucket(
      "education", hash_bucket_size=1000)
  marital_status = tf.contrib.layers.sparse_column_with_hash_bucket(
      "marital_status", hash_bucket_size=100)
  relationship = tf.contrib.layers.sparse_column_with_hash_bucket(
      "relationship", hash_bucket_size=100)
  workclass = tf.contrib.layers.sparse_column_with_hash_bucket(
      "workclass", hash_bucket_size=100)
  occupation = tf.contrib.layers.sparse_column_with_hash_bucket(
      "occupation", hash_bucket_size=1000)
  native_country = tf.contrib.layers.sparse_column_with_hash_bucket(
      "native_country", hash_bucket_size=1000)

  # Continuous base columns.
  age = real_valued_column("age")
  education_num = real_valued_column("education_num")
  capital_gain = real_valued_column("capital_gain")
  capital_loss = real_valued_column("capital_loss")
  hours_per_week = real_valued_column("hours_per_week")

  # Transformations.
  age_buckets = tf.contrib.layers.bucketized_column(
      age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
  education_occupation = tf.contrib.layers.crossed_column(
      [education, occupation], hash_bucket_size=int(1e4))
  age_race_occupation = tf.contrib.layers.crossed_column(
      [age_buckets, race, occupation], hash_bucket_size=int(1e6))
  country_occupation = tf.contrib.layers.crossed_column(
      [native_country, occupation], hash_bucket_size=int(1e4))

  # Wide columns and deep columns.
  wide_columns = [gender, native_country, education, 
                  occupation, workclass, race, 
                  marital_status, relationship, 
                  age_buckets,
                  education_occupation, 
                  age_race_occupation,
                  country_occupation]

  deep_columns = [
      embedding_column(gender, dimension=8),
      embedding_column(native_country, dimension=8),
      embedding_column(education, dimension=8),
      embedding_column(occupation, dimension=8),
      embedding_column(workclass, dimension=8),
      embedding_column(race, dimension=8),
      embedding_column(marital_status, dimension=8),
      embedding_column(relationship, dimension=8),
      embedding_column(age_buckets, dimension=8),
      embedding_column(education_occupation, dimension=8),
      embedding_column(age_race_occupation, dimension=8),
      embedding_column(country_occupation, dimension=8),
      age,
      education_num,
      capital_gain,
      capital_loss,
      hours_per_week,
  ]

  return wide_columns, deep_columns
示例#47
0
tf.logging.set_verbosity(tf.logging.INFO)

CSV_COLUMNS = 'fare_amount,dayofweek,hourofday,pickuplon,pickuplat,dropofflon,dropofflat,passengers,key'.split(',')
SCALE_COLUMNS = ['pickuplon','pickuplat','dropofflon','dropofflat','passengers']
LABEL_COLUMN = 'fare_amount'
KEY_FEATURE_COLUMN = 'key'
DEFAULTS = [[0.0], ['Sun'], [0], [-74.0], [40.0], [-74.0], [40.7], [1.0], ['nokey']]

# These are the raw input columns, and will be provided for prediction also
INPUT_COLUMNS = [
    # define features
    layers.sparse_column_with_keys('dayofweek', keys=['Sun', 'Mon', 'Tues', 'Wed', 'Thu', 'Fri', 'Sat']),
    layers.sparse_column_with_integerized_feature('hourofday', bucket_size=24),

    # engineered features that are created in the input_fn
    layers.real_valued_column('latdiff'),
    layers.real_valued_column('londiff'),
    layers.real_valued_column('euclidean'),

    # real_valued_column
    layers.real_valued_column('pickuplon'),
    layers.real_valued_column('pickuplat'),
    layers.real_valued_column('dropofflat'),
    layers.real_valued_column('dropofflon'),
    layers.real_valued_column('passengers'),
]

def build_estimator(model_dir, nbuckets, hidden_units):
  """
     Build an estimator starting from INPUT COLUMNS.
     These include feature transformations and synthetic features.
X_train = X_train.copy()
X_test = X_test.copy()
categorical_var_encoders = {}
for var in categorical_vars:
  le = LabelEncoder().fit(X_train[var])
  X_train[var + '_ids'] = le.transform(X_train[var])
  X_test[var + '_ids'] = le.transform(X_test[var])
  X_train.pop(var)
  X_test.pop(var)
  categorical_var_encoders[var] = le

### Note: Feature Columns currently (2016/10/22) not working, update is coming.
# Setup feature columns.
CATEGORICAL_EMBED_SIZE = 10 # Note, you can customize this per variable.
feature_columns = [
  layers.real_valued_column(var) for var in continues_vars
] + [
  layers.embedding_column(
     layers.sparse_column_with_integerized_feature(
       var + '_ids', len(categorical_var_encoders[var].classes_)), 
     CATEGORICAL_EMBED_SIZE) for var in
  categorical_vars
]


# Linear classifier.
'''
random.seed(42)
tflr = learn.LinearClassifier(n_classes=2,
    feature_columns=feature_columns,
    optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.05))
def _dnn_tree_combined_model_fn(
    features, labels, mode, head, dnn_hidden_units,
    dnn_feature_columns, tree_learner_config, num_trees,
    tree_examples_per_layer,
    config=None, dnn_optimizer="Adagrad",
    dnn_activation_fn=nn.relu, dnn_dropout=None,
    dnn_input_layer_partitioner=None,
    dnn_input_layer_to_tree=True, dnn_steps_to_train=10000,
    tree_feature_columns=None,
    tree_center_bias=True):
  """DNN and GBDT combined model_fn.

  Args:
    features: `dict` of `Tensor` objects.
    labels: Labels used to train on.
    mode: Mode we are in. (TRAIN/EVAL/INFER)
    head: A `Head` instance.
    dnn_hidden_units: List of hidden units per layer.
    dnn_feature_columns: An iterable containing all the feature columns
      used by the model's DNN.
    tree_learner_config: A config for the tree learner.
    num_trees: Number of trees to grow model to after training DNN.
    tree_examples_per_layer: Number of examples to accumulate before
      growing the tree a layer. This value has a big impact on model
      quality and should be set equal to the number of examples in
      training dataset if possible. It can also be a function that computes
      the number of examples based on the depth of the layer that's
      being built.
    config: `RunConfig` of the estimator.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN. If `None`, will use the Adagrad
      optimizer with default learning rate of 0.001.
    dnn_activation_fn: Activation function applied to each layer of the DNN.
      If `None`, will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability to drop out a given
      unit in the DNN.
    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
    as a feature to the tree.
    dnn_steps_to_train: Number of steps to train dnn for before switching
      to gbdt.
    tree_feature_columns: An iterable containing all the feature columns
      used by the model's boosted trees. If dnn_input_layer_to_tree is
      set to True, these features are in addition to dnn_feature_columns.
    tree_center_bias: Whether a separate tree should be created for
      first fitting the bias.

  Returns:
    A `ModelFnOps` object.
  Raises:
    ValueError: if inputs are not valid.
  """
  if not isinstance(features, dict):
    raise ValueError("features should be a dictionary of `Tensor`s. "
                     "Given type: {}".format(type(features)))

  if not dnn_feature_columns:
    raise ValueError("dnn_feature_columns must be specified")

  # Build DNN Logits.
  dnn_parent_scope = "dnn"
  dnn_partitioner = dnn_input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=config.num_ps_replicas,
          min_slice_size=64 << 20))

  with variable_scope.variable_scope(
      dnn_parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=dnn_partitioner):

    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner) as input_layer_scope:
      input_layer = layers.input_from_feature_columns(
          columns_to_tensors=features,
          feature_columns=dnn_feature_columns,
          weight_collections=[dnn_parent_scope],
          scope=input_layer_scope)
    previous_layer = input_layer
    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
      with variable_scope.variable_scope(
          "hiddenlayer_%d" % layer_id,
          values=(previous_layer,)) as hidden_layer_scope:
        net = layers.fully_connected(
            previous_layer,
            num_hidden_units,
            activation_fn=dnn_activation_fn,
            variables_collections=[dnn_parent_scope],
            scope=hidden_layer_scope)
        if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
      _add_hidden_layer_summary(net, hidden_layer_scope.name)
      previous_layer = net
    with variable_scope.variable_scope(
        "logits",
        values=(previous_layer,)) as logits_scope:
      dnn_logits = layers.fully_connected(
          previous_layer,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[dnn_parent_scope],
          scope=logits_scope)
    _add_hidden_layer_summary(dnn_logits, logits_scope.name)

    def _dnn_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizers.optimize_loss(
          loss=loss,
          global_step=training_util.get_global_step(),
          learning_rate=_DNN_LEARNING_RATE,
          optimizer=_get_optimizer(dnn_optimizer),
          name=dnn_parent_scope,
          variables=ops.get_collection(
              ops.GraphKeys.TRAINABLE_VARIABLES,
              scope=dnn_parent_scope),
          # Empty summaries to prevent optimizers from logging training_loss.
          summaries=[])

  # Build Tree Logits.
  global_step = training_util.get_global_step()
  with ops.device(global_step.device):
    ensemble_handle = model_ops.tree_ensemble_variable(
        stamp_token=0,
        tree_ensemble_config="",  # Initialize an empty ensemble.
        name="ensemble_model")

  tree_features = features.copy()
  if dnn_input_layer_to_tree:
    tree_features["dnn_input_layer"] = input_layer
    tree_feature_columns.append(layers.real_valued_column("dnn_input_layer"))
  gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
      is_chief=config.is_chief,
      num_ps_replicas=config.num_ps_replicas,
      ensemble_handle=ensemble_handle,
      center_bias=tree_center_bias,
      examples_per_layer=tree_examples_per_layer,
      learner_config=tree_learner_config,
      feature_columns=tree_feature_columns,
      logits_dimension=head.logits_dimension,
      features=tree_features)

  with ops.name_scope("gbdt"):
    predictions_dict = gbdt_model.predict(mode)
    tree_logits = predictions_dict["predictions"]

    def _tree_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      update_op = gbdt_model.train(loss, predictions_dict, labels)
      with ops.control_dependencies(
          [update_op]), (ops.colocate_with(global_step)):
        update_op = state_ops.assign_add(global_step, 1).op
        return update_op

  tree_train_logits = dnn_logits + tree_logits

  def _no_train_op_fn(loss):
    """Returns a no-op."""
    del loss
    return control_flow_ops.no_op()

  model_fn_ops = head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_no_train_op_fn,
      logits=tree_train_logits)
  dnn_train_op = head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_dnn_train_op_fn,
      logits=dnn_logits).train_op
  tree_train_op = head.create_model_fn_ops(
      features=tree_features,
      mode=mode,
      labels=labels,
      train_op_fn=_tree_train_op_fn,
      logits=tree_train_logits).train_op

  if tree_center_bias:
    num_trees += 1
  finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()

  model_fn_ops.training_hooks.extend([
      trainer_hooks.SwitchTrainOp(
          dnn_train_op, dnn_steps_to_train, tree_train_op),
      trainer_hooks.StopAfterNTrees(
          num_trees, attempted_trees, finalized_trees)])

  return model_fn_ops