Пример #1
0
def _get_tfbt(output_dir):
    """Configures TF Boosted Trees estimator based on flags."""
    learner_config = learner_pb2.LearnerConfig()

    num_classes = 10

    learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate
    learner_config.num_classes = num_classes
    learner_config.regularization.l1 = 0.0
    learner_config.regularization.l2 = FLAGS.l2 / FLAGS.examples_per_layer
    learner_config.constraints.max_tree_depth = FLAGS.depth

    growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
    learner_config.growing_mode = growing_mode
    run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)

    learner_config.multi_class_strategy = (
        learner_pb2.LearnerConfig.DIAGONAL_HESSIAN)

    # Create a TF Boosted trees estimator that can take in custom loss.
    estimator = GradientBoostedDecisionTreeClassifier(
        learner_config=learner_config,
        n_classes=num_classes,
        examples_per_layer=FLAGS.examples_per_layer,
        model_dir=output_dir,
        num_trees=FLAGS.num_trees,
        center_bias=False,
        config=run_config)
    return estimator
Пример #2
0
def _get_tfbt(output_dir, feature_cols):
    """Configures TF Boosted Trees estimator based on flags."""
    learner_config = learner_pb2.LearnerConfig()

    learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate
    learner_config.regularization.l1 = 0.0
    # Set the regularization per instance in such a way that
    # regularization for the full training data is equal to l2 flag.
    learner_config.regularization.l2 = FLAGS.l2 / FLAGS.batch_size
    learner_config.constraints.max_tree_depth = FLAGS.depth
    learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER

    run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=30)

    # Create a TF Boosted trees regression estimator.
    estimator = GradientBoostedDecisionTreeClassifier(
        learner_config=learner_config,
        examples_per_layer=FLAGS.examples_per_layer,
        n_classes=2,
        num_trees=FLAGS.num_trees,
        feature_columns=feature_cols,
        model_dir=output_dir,
        config=run_config,
        center_bias=False)
    return estimator
Пример #3
0
    def set_parameter(self, param):
        for name in self.default_param:
            if name not in param:
                param[name] = self.default_param[name]

        self.build_model()

        self.learner_config = learner_pb2.LearnerConfig()

        self.learner_config.learning_rate_tuner.fixed.learning_rate = float(
            param['learning_rate'])

        self.learner_config.regularization.l1 = 0.0
        self.learner_config.regularization.l2 = float(param['L2']) / int(
            param['examples_per_layer'])
        self.learner_config.constraints.max_tree_depth = int(param['depth'])

        self.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
        self.learner_config.growing_mode = self.growing_mode
        self.run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
        self.model_path = param['model_path']
        self.class_num = int(param['class_num'])
        if param['objective'] is "multiclass":
            print("here")

            self.learner_config.num_classes = param['class_num']
            self.learner_config.multi_class_strategy = (
                learner_pb2.LearnerConfig.DIAGONAL_HESSIAN)

            # Create a TF Boosted trees estimator that can take in custom loss.
            self.estimator = GradientBoostedDecisionTreeClassifier(
                learner_config=self.learner_config,
                n_classes=int(self.class_num),
                examples_per_layer=int(param['examples_per_layer']),
                model_dir=self.model_path,
                num_trees=int(param['num_trees']),
                center_bias=False,
                config=self.run_config)
        else:
            pass

        self.batch_size = int(param["batch_size"])
        self.eval_batch_size = int(param['eval_batch_size'])
        self.num_epochs = param["num_epochs"]
Пример #4
0
learner_config = gbdt_learner.LearnerConfig()
learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate
learner_config.regularization.l1 = l1_regul
learner_config.regularization.l2 = l2_regul / examples_per_layer
learner_config.constraints.max_tree_depth = max_depth
growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER
learner_config.growing_mode = growing_mode
run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
learner_config.multi_class_strategy = (
    gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN)\

# create a tensorflow GBDT estimator
gbdt_model = GradientBoostedDecisionTreeClassifier(
    model_dir=None,  # No save directory specified
    learner_config=learner_config,
    n_classes=num_classes,
    examples_per_layer=examples_per_layer,
    num_trees=num_trees,
    center_bias=False,
    config=run_config)

# Display TF info logs
tf.logging.set_verbosity(tf.logging.INFO)

# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(x={'images': mnist.train.images},
                                              y=mnist.train.labels,
                                              batch_size=batch_size,
                                              num_epochs=None,
                                              shuffle=True)
# Train the Model
gbdt_model.fit(input_fn=input_fn, max_steps=max_steps)
Пример #5
0
def test2():

    import tensorflow as tf
    from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier
    from tensorflow.contrib.boosted_trees.proto import learner_pb2 as gbdt_learner

    # Ignore all GPUs (current TF GBDT does not support GPU).
    import os
    os.environ["CUDA_VISIBLE_DEVICES"] = ""

    # 设置日志级别
    tf.logging.set_verbosity(tf.logging.ERROR)
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets(
        "MNIST_data",
        one_hot=False,
        source_url='http://yann.lecun.com/exdb/mnist/')

    # 参数
    batch_size = 4096  # 批次大小
    num_classes = 10  # 标签数
    num_features = 784  # 特征数
    max_steps = 10000  # 最大步数

    # GBDT Parameters
    learning_rate = 0.1
    l1_regul = 0.
    l2_regul = 1.
    examples_per_layer = 1000
    num_trees = 10
    max_depth = 16

    # 设置参数
    learner_config = gbdt_learner.LearnerConfig()
    learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate
    learner_config.regularization.l1 = l1_regul
    learner_config.regularization.l2 = l2_regul / examples_per_layer
    learner_config.constraints.max_tree_depth = max_depth
    growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER
    learner_config.growing_mode = growing_mode
    run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
    learner_config.multi_class_strategy = (
        gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN)

    # 创建模型
    gbdt_model = GradientBoostedDecisionTreeClassifier(
        model_dir=None,  # No save directory specified
        learner_config=learner_config,
        n_classes=num_classes,
        examples_per_layer=examples_per_layer,
        num_trees=num_trees,
        center_bias=False,
        config=run_config)

    # Display TF info logs
    tf.logging.set_verbosity(tf.logging.INFO)

    # Define the input function for training
    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': mnist.train.images},
        y=mnist.train.labels,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)

    # 训练
    gbdt_model.fit(input_fn=input_fn, max_steps=max_steps)

    # 预测
    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': mnist.test.images},
        y=mnist.test.labels,
        batch_size=batch_size,
        shuffle=False)
    e = gbdt_model.evaluate(input_fn=input_fn)

    print("Testing Accuracy:", e['accuracy'])
learner_config = gbdt_learner.LearnerConfig()
learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate
learner_config.regularization.l1 = l1_regul
learner_config.regularization.l2 = l2_regul / examples_per_layer
learner_config.constraints.max_tree_depth = max_depth
growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER
learner_config.growing_mode = growing_mode
run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
learner_config.multi_class_strategy = (
    gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN)\

# Create a TensorFlor GBDT Estimator
gbdt_model = GradientBoostedDecisionTreeClassifier(
    model_dir=None, # No save directory specified
    learner_config=learner_config,
    n_classes=num_classes,
    examples_per_layer=examples_per_layer,
    num_trees=num_trees,
    center_bias=False,
    config=run_config)

# Display TF info logs
tf.logging.set_verbosity(tf.logging.INFO)

# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.train.images}, y=mnist.train.labels,
    batch_size=batch_size, num_epochs=None, shuffle=True)
# Train the Model
gbdt_model.fit(input_fn=input_fn, max_steps=max_steps)

# Evaluate the Model
Пример #7
0
# -*- coding: utf-8 -*-
# @Time    : 2019/8/7 17:28
# @Author  : skydm
# @Email   : [email protected]
# @File    : gbdt.py
# @Software: PyCharm

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "5"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier
import tensorflow.contrib.boosted_trees.proto.learner_pb2 as gbdt_learner

# Set verbosity to display errors only (Remove this line for showing warnings)
tf.logging.set_verbosity(tf.logging.ERROR)

mnist = input_data.read_data_sets("./data", one_hot=False)

# params

learner_config = gbdt_learner.LearnerConfig()
learner_config.learn

gbdt_model = GradientBoostedDecisionTreeClassifier()
Пример #8
0
def run():
    # 忽略所有的 GPU, 当前的 TF GBDT 不支持 GPU
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

    # 设置 Tensorflow 的 log 只显示 error
    tf.logging.set_verbosity(tf.logging.ERROR)
    # 导入 MNIST 数据集
    mnist = input_data.read_data_sets(
        "/tmp/data/",
        one_hot=False,
        source_url='http://yann.lecun.com/exdb/mnist/')

    # 超参数设置
    batch_size = 4096  # 每批用来训练的数据的大小
    num_classes = 10  # 最终结果一共有基类
    num_features = 784  # 输入数据的特征数——每张图片的大小是 28 * 28 像素
    max_steps = 10000

    # GBDT 超参数
    learning_rate = 0.1  # 学习率
    l1_regul = 0.  # L1 正则化
    l2_regul = 1.  # L2 正则化
    examples_per_layer = 1000
    num_trees = 10  # 树的数量
    max_depth = 16  # 最大深度

    # 设置 GBDT 的超参数
    learner_config = gbdt_learner.LearnerConfig()
    learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate
    learner_config.regularization.l1 = l1_regul
    learner_config.regularization.l2 = l2_regul / examples_per_layer
    learner_config.constraints.max_tree_depth = max_depth
    growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER
    learner_config.growing_mode = growing_mode
    run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
    learner_config.multi_class_strategy = (
        gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN)

    # 创建TensorFlor GBDT 模型
    gbdt_model = GradientBoostedDecisionTreeClassifier(
        model_dir=None,  # 不指定保存位置
        learner_config=learner_config,
        n_classes=num_classes,
        examples_per_layer=examples_per_layer,
        num_trees=num_trees,
        center_bias=False,
        config=run_config)

    # 展示 log 的 info 信息
    tf.logging.set_verbosity(tf.logging.INFO)

    # 定义用来训练的输入数据
    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': mnist.train.images},
        y=mnist.train.labels,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)

    # 拟合模型
    gbdt_model.fit(input_fn=input_fn, max_steps=max_steps)

    # 评估拟合后的模型
    # 定义用于评估的输入
    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': mnist.test.images},
        y=mnist.test.labels,
        batch_size=batch_size,
        shuffle=False)

    # 使用 评估 方法来进行评估
    e = gbdt_model.evaluate(input_fn=input_fn)
    print("测试准确度为:", e['accuracy'])

    return
Пример #9
0
learner_config = gbdt_learner.LearnerConfig()
learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate
learner_config.regularization.l1 = l1_regul
learner_config.regularization.l2 = l2_regul / examples_per_layer
learner_config.constraints.max_tree_depth = max_depth
growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER
learner_config.growing_mode = growing_mode
run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
learner_config.multi_class_strategy = (
    gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN)

#定义分类器
gbdt_model = GradientBoostedDecisionTreeClassifier(
    learner_config=learner_config,
    n_classes=num_classes,
    examples_per_layer=examples_per_layer,
    num_trees=num_trees,
    center_bias=False,
    config=run_config)

#mini-batch输入, 训练模型
input_fn = tf.estimator.inputs.numpy_input_fn(x={'images': mnist.train.images},
                                              y=mnist.train.labels,
                                              batch_size=batch_size,
                                              num_epochs=None,
                                              shuffle=True)
gbdt_model.fit(input_fn=input_fn, max_steps=max_steps)

#测试
input_fn = tf.estimator.inputs.numpy_input_fn(x={'images': mnist.test.images},
                                              y=mnist.test.labels,
Пример #10
0
learner_config.regularization.l1 = l1_regul
learner_config.regularization.l2 = l2_regul / examples_per_layer
learner_config.constraints.max_tree_depth = max_depth
growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER
learner_config.growing_mode = growing_mode
run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
learner_config.multi_class_strategy = gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN
print("-0-----------------------------------------------------------")

print(mnist.train.images.dtype, mnist.train.labels.dtype)

# Create a TensorFlor GBDT Estimator
gbdt_model = GradientBoostedDecisionTreeClassifier(
    model_dir=None,  # No save directory specified
    learner_config=learner_config,
    n_classes=num_classes,
    examples_per_layer=examples_per_layer,
    num_trees=num_trees,
    center_bias=False,
    config=run_config)

# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'im': np.asarray(mnist.train.images)},
    y=np.asarray(mnist.train.labels),
    batch_size=batch_size,
    num_epochs=None,
    shuffle=False)
# Train the Model
# gbdt_model.fit(input_fn=input_fn, max_steps=max_steps)

# Evaluate the Model