def train(_): training_dir = pjoin(FLAGS.training_set, FLAGS.train_subdir) feature_context = Datasets.get_context(training_dir) (feature_names, label_names) = feature_context.multispec_feature_groups training_dataset = Datasets.dict.read_dataset(training_dir) (feature_train_data, labels_train_data) = transform_dataset(feature_context, training_dataset) params = { 'objective': 'multi:softprob', 'verbose': False, 'num_class': len(label_names), 'max_depth': 6, 'nthread': 4, 'silent': 1 } xg_train = xgb.DMatrix(feature_train_data, label=labels_train_data) xg_model = xgb.train(params, xg_train, FLAGS.rounds) model_path = pjoin(FLAGS.local_dir, "iterator.model") xg_model.save_model(model_path) output_path = pjoin(FLAGS.training_set, "xgboost/iterator.model") file_io.copy(model_path, output_path, overwrite=True)
def main(_): from examples_utils import get_data_dir import tempfile config = Trainer.get_default_run_config(tempfile.mkdtemp()) feature_context = Datasets.get_context(get_data_dir("train")) (feature_names, label_names) = feature_context.multispec_feature_groups features = [tf.feature_column.numeric_column(x) for x in feature_names] def split_features_label_fn(spec): label = spec.pop(label_names[0]) return spec, label classifier = tf.estimator.LinearClassifier(features, config=config) Trainer.run(estimator=classifier, training_data_dir=get_data_dir("train"), eval_data_dir=get_data_dir("eval"), split_features_label_fn=split_features_label_fn, run_config=config)
def test_trainer_shouldnt_crash(self): context = Datasets.get_context(self.test_resources_dir) (feature_names, label_names) = context.multispec_feature_groups feature_columns = [tf.feature_column.numeric_column(name) for name in feature_names] config = Trainer.get_default_run_config(job_dir=tempfile.mkdtemp()) estimator = tf.estimator.LinearClassifier(feature_columns=feature_columns, config=config) def split_features_label_fn(parsed_features): self.assertEqual(len(label_names), 1) label = parsed_features.pop(label_names[0]) return parsed_features, label Trainer.run(estimator, training_data_dir=self.test_resources_dir, eval_data_dir=self.test_resources_dir, split_features_label_fn=split_features_label_fn, run_config=config)