Exemplo n.º 1
0
def train(_):
    training_dir = pjoin(FLAGS.training_set, FLAGS.train_subdir)
    feature_context = Datasets.get_context(training_dir)

    (feature_names, label_names) = feature_context.multispec_feature_groups

    training_dataset = Datasets.dict.read_dataset(training_dir)
    (feature_train_data,
     labels_train_data) = transform_dataset(feature_context, training_dataset)

    params = {
        'objective': 'multi:softprob',
        'verbose': False,
        'num_class': len(label_names),
        'max_depth': 6,
        'nthread': 4,
        'silent': 1
    }

    xg_train = xgb.DMatrix(feature_train_data, label=labels_train_data)
    xg_model = xgb.train(params, xg_train, FLAGS.rounds)

    model_path = pjoin(FLAGS.local_dir, "iterator.model")
    xg_model.save_model(model_path)

    output_path = pjoin(FLAGS.training_set, "xgboost/iterator.model")
    file_io.copy(model_path, output_path, overwrite=True)
Exemplo n.º 2
0
def main(_):
    from examples_utils import get_data_dir
    import tempfile

    config = Trainer.get_default_run_config(tempfile.mkdtemp())

    feature_context = Datasets.get_context(get_data_dir("train"))
    (feature_names, label_names) = feature_context.multispec_feature_groups
    features = [tf.feature_column.numeric_column(x) for x in feature_names]

    def split_features_label_fn(spec):
        label = spec.pop(label_names[0])
        return spec, label

    classifier = tf.estimator.LinearClassifier(features, config=config)
    Trainer.run(estimator=classifier,
                training_data_dir=get_data_dir("train"),
                eval_data_dir=get_data_dir("eval"),
                split_features_label_fn=split_features_label_fn,
                run_config=config)
Exemplo n.º 3
0
    def test_trainer_shouldnt_crash(self):
        context = Datasets.get_context(self.test_resources_dir)
        (feature_names, label_names) = context.multispec_feature_groups
        feature_columns = [tf.feature_column.numeric_column(name) for name in feature_names]

        config = Trainer.get_default_run_config(job_dir=tempfile.mkdtemp())

        estimator = tf.estimator.LinearClassifier(feature_columns=feature_columns,
                                                  config=config)

        def split_features_label_fn(parsed_features):
            self.assertEqual(len(label_names),  1)
            label = parsed_features.pop(label_names[0])
            return parsed_features, label

        Trainer.run(estimator,
                    training_data_dir=self.test_resources_dir,
                    eval_data_dir=self.test_resources_dir,
                    split_features_label_fn=split_features_label_fn,
                    run_config=config)