示例#1
0
    def testFitAndEvaluateDontThrowExceptionWithCoreForRegressor(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        regressor = estimator.GradientBoostedDecisionTreeRegressor(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")],
            use_core_libs=True)

        regressor.fit(input_fn=_train_input_fn, steps=15)
        regressor.evaluate(input_fn=_eval_input_fn, steps=1)
        regressor.export(self._export_dir_base)
    def test_train_starts_server(self, mock_server):
        # Arrange.
        tf_config = {
            'cluster': self._cluster_spec(),
            'environment': run_config_lib.Environment.CLOUD,
            'task': {
                'type': run_config_lib.TaskType.WORKER,
                'index': 1
            }
        }
        with test.mock.patch.dict('os.environ',
                                  {'TF_CONFIG': json.dumps(tf_config)}):
            config = run_config_lib.RunConfig(master='host4:2222',
                                              num_cores=15,
                                              gpu_memory_fraction=0.314)

        for est in self._estimators_for_tests(config):
            ex = experiment.Experiment(est,
                                       train_input_fn='train_input',
                                       eval_input_fn='eval_input')

            # Act.
            # We want to make sure we discount the time it takes to start the server
            # in our accounting of the delay, so we set a small delay here.
            sheep = SheepCounter()
            with test.mock.patch.object(time, 'time', sheep.time):
                with test.mock.patch.object(time, 'sleep', sheep.sleep):
                    ex.train(delay_secs=1)
                    # Ensure that the delay takes into account the time to start server.
                    self.assertAlmostEqual(1, sheep.time(), delta=1e-4)

            # Assert.
            expected_config_proto = config_pb2.ConfigProto()
            expected_config_proto.inter_op_parallelism_threads = 15
            expected_config_proto.intra_op_parallelism_threads = 15
            expected_config_proto.gpu_options.per_process_gpu_memory_fraction = 0.314
            mock_server.assert_called_with(
                config.cluster_spec,
                job_name=run_config_lib.TaskType.WORKER,
                task_index=1,
                config=expected_config_proto,
                start=False)
            mock_server.assert_has_calls([test.mock.call().start()])
 def test_train_raises_if_job_name_is_missing(self):
     tf_config = {
         'cluster': self._cluster_spec(),
         'environment': run_config_lib.Environment.CLOUD,
         'task': {
             'index': 1
         }
     }
     with test.mock.patch.dict('os.environ',
                               {'TF_CONFIG': json.dumps(tf_config)
                                }), self.assertRaises(ValueError):
         config = run_config_lib.RunConfig(
             master='host3:2222'  # Normally selected by task type.
         )
         for est in self._estimators_for_tests(config):
             ex = experiment.Experiment(est,
                                        train_input_fn='train_input',
                                        eval_input_fn='eval_input')
             ex.train()
示例#4
0
  def test_replace(self):
    config = run_config_lib.RunConfig(
        tf_random_seed=RANDOM_SEED, model_dir=TEST_DIR)
    self.assertEqual(TEST_DIR, config.model_dir)
    self.assertEqual(RANDOM_SEED, config.tf_random_seed)

    new_config = config.replace(model_dir=ANOTHER_TEST_DIR)
    self.assertEqual(ANOTHER_TEST_DIR, new_config.model_dir)
    self.assertEqual(RANDOM_SEED, new_config.tf_random_seed)

    self.assertEqual(TEST_DIR, config.model_dir)
    self.assertEqual(RANDOM_SEED, config.tf_random_seed)

    with self.assertRaises(ValueError):
      # tf_random_seed is not allowed to be replaced.
      config.replace(tf_random_seed=RANDOM_SEED)

    with self.assertRaises(ValueError):
      config.replace(some_undefined_property=RANDOM_SEED)
示例#5
0
    def setUp(self):
        np.random.seed(3)
        self.num_centers = 5
        self.num_dims = 2
        self.num_points = 10000
        self.true_centers = self.make_random_centers(self.num_centers,
                                                     self.num_dims)
        self.points, _, self.scores = self.make_random_points(
            self.true_centers, self.num_points)
        self.true_score = np.add.reduce(self.scores)

        self.kmeans = KMeans(self.num_centers,
                             initial_clusters=kmeans_ops.RANDOM_INIT,
                             batch_size=self.batch_size,
                             use_mini_batch=self.use_mini_batch,
                             steps=30,
                             continue_training=True,
                             config=run_config.RunConfig(tf_random_seed=14),
                             random_seed=12)
示例#6
0
    def benchmarkLogisticNpMatrixData(self):
        classifier = dnn.DNNClassifier(
            feature_columns=(feature_column.real_valued_column('',
                                                               dimension=4), ),
            hidden_units=(3, 3),
            config=run_config.RunConfig(tf_random_seed=1))
        iris = test_data.prepare_iris_data_for_logistic_regression()
        train_x = iris.data
        train_y = iris.target
        steps = 100
        metrics = classifier.fit(x=train_x, y=train_y,
                                 steps=steps).evaluate(x=train_x,
                                                       y=train_y,
                                                       steps=1)
        estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
                                             metrics)
        estimator_test_utils.assert_in_range(0.8, 1.0, 'accuracy', metrics)

        self._report_metrics(metrics)
示例#7
0
    def testDoesNotOverrideGlobalSteps(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 2
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")],
            output_leaf_index=False)

        classifier.fit(input_fn=_train_input_fn, steps=15)
        # When no override of global steps, 5 steps were used.
        self._assert_checkpoint(classifier.model_dir, global_step=5)
示例#8
0
  def test_random_input_large(self):
    # sklearn version.
    iterations = 5  # that should be enough to know whether this diverges
    np.random.seed(5)
    num_classes = 20
    x = np.array([[np.random.random() for _ in range(100)]
                  for _ in range(num_classes)], dtype=np.float32)

    # skflow version.
    gmm = gmm_lib.GMM(num_classes,
                      covariance_type='full',
                      config=run_config.RunConfig(tf_random_seed=2))

    def get_input_fn(x):
      def input_fn():
        return constant_op.constant(x.astype(np.float32)), None
      return input_fn

    gmm.fit(input_fn=get_input_fn(x), steps=iterations)
    self.assertFalse(np.isnan(gmm.clusters()).any())
示例#9
0
 def _fit(self, num_iters=10):
   scores = []
   start = time.time()
   for i in range(num_iters):
     print('Starting tensorflow KMeans: %d' % i)
     tf_kmeans = kmeans_lib.KMeansClustering(
         self.num_clusters,
         initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT,
         kmeans_plus_plus_num_retries=int(math.log(self.num_clusters) + 2),
         random_seed=i * 42,
         relative_tolerance=1e-6,
         config=run_config.RunConfig(tf_random_seed=3))
     tf_kmeans.fit(
         input_fn=lambda: (constant_op.constant(self.points), None), steps=50)
     _ = tf_kmeans.clusters()
     scores.append(
         tf_kmeans.score(
             input_fn=lambda: (constant_op.constant(self.points), None),
             steps=1))
   self._report(num_iters, start, time.time(), scores)
示例#10
0
    def benchmarkMultiClassMatrixData(self):
        """Tests multi-class classification using matrix data as input."""
        classifier = dnn.DNNClassifier(
            n_classes=3,
            feature_columns=(feature_column.real_valued_column('feature',
                                                               dimension=4), ),
            hidden_units=(3, 3),
            config=run_config.RunConfig(tf_random_seed=1))

        input_fn = test_data.iris_input_multiclass_fn
        steps = 500
        metrics = classifier.fit(input_fn=input_fn,
                                 steps=steps).evaluate(input_fn=input_fn,
                                                       steps=1)
        estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
                                             metrics)
        estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics)
        estimator_test_utils.assert_in_range(0.0, 0.4, 'loss', metrics)

        self._report_metrics(metrics)
示例#11
0
 def test_fit_with_cosine_distance(self):
     # Create points on y=x and y=1.5x lines to check the cosine similarity.
     # Note that euclidean distance will give different results in this case.
     points = np.array([[9, 9], [0.5, 0.5], [10, 15], [0.4, 0.6]])
     # true centers are the unit vectors on lines y=x and y=1.5x
     true_centers = np.array([[0.70710678, 0.70710678],
                              [0.5547002, 0.83205029]])
     kmeans = KMeans(2,
                     initial_clusters=kmeans_ops.RANDOM_INIT,
                     distance_metric=kmeans_ops.COSINE_DISTANCE,
                     use_mini_batch=self.use_mini_batch,
                     batch_size=4,
                     steps=30,
                     continue_training=True,
                     config=run_config.RunConfig(tf_random_seed=2),
                     random_seed=12)
     kmeans.fit(x=points)
     centers = normalize(kmeans.clusters())
     self.assertAllClose(np.sort(centers, axis=0),
                         np.sort(true_centers, axis=0))
示例#12
0
  def test_is_chief_from_cloud_tf_config(self):
    # is_chief should be true when ["task"]["type"] == "master" and
    # index == 0 and ["task"]["environment"] == "cloud". Note that
    # test_values_from_tf_config covers the non-master case.
    tf_config = {
        "cluster": {
            run_config_lib.TaskType.PS: ["host1:1", "host2:2"],
            run_config_lib.TaskType.MASTER: ["host3:3"],
            run_config_lib.TaskType.WORKER: ["host4:4", "host5:5", "host6:6"]
        },
        "task": {
            "type": run_config_lib.TaskType.MASTER,
            "index": 0
        },
        "environment": "cloud"
    }
    with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
      config = run_config_lib.RunConfig()

    self.assertTrue(config.is_chief)
示例#13
0
    def benchmarkLogisticTensorData(self):
        def _input_fn(num_epochs=None):
            features = {
                'age':
                input_lib.limit_epochs(constant_op.constant(
                    ((.8, ), (0.2, ), (.1, ))),
                                       num_epochs=num_epochs),
                'language':
                sparse_tensor.SparseTensor(values=input_lib.limit_epochs(
                    ('en', 'fr', 'zh'), num_epochs=num_epochs),
                                           indices=((0, 0), (0, 1), (2, 0)),
                                           dense_shape=(3, 2))
            }
            return features, constant_op.constant(((1, ), (0, ), (0, )),
                                                  dtype=dtypes.int32)

        lang_column = feature_column.sparse_column_with_hash_bucket(
            'language', hash_bucket_size=20)
        classifier = dnn.DNNClassifier(
            feature_columns=(feature_column.embedding_column(lang_column,
                                                             dimension=1),
                             feature_column.real_valued_column('age')),
            hidden_units=(3, 3),
            config=run_config.RunConfig(tf_random_seed=1))
        steps = 100
        metrics = classifier.fit(input_fn=_input_fn,
                                 steps=steps).evaluate(input_fn=_input_fn,
                                                       steps=1)
        estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
                                             metrics)
        estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics)
        estimator_test_utils.assert_in_range(0.0, 0.3, 'loss', metrics)

        self._report_metrics(metrics)
        self._report_predictions(classifier=classifier,
                                 input_fn=functools.partial(_input_fn,
                                                            num_epochs=1),
                                 iters=metrics['global_step'],
                                 n_examples=3,
                                 n_classes=2,
                                 expected_classes=(1, 0, 0))
    def benchmarkPartitionedVariables(self):
        def _input_fn():
            features = {
                'language':
                sparse_tensor.SparseTensor(values=('en', 'fr', 'zh'),
                                           indices=((0, 0), (0, 1), (2, 0)),
                                           dense_shape=(3, 2))
            }
            labels = constant_op.constant(((1, ), (0, ), (0, )))
            return features, labels

        # The given hash_bucket_size results in variables larger than the
        # default min_slice_size attribute, so the variables are partitioned.
        sparse_feature = feature_column.sparse_column_with_hash_bucket(
            'language', hash_bucket_size=2e7)
        embedding_feature = feature_column.embedding_column(sparse_feature,
                                                            dimension=1)

        tf_config = {
            'cluster': {
                run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1']
            }
        }
        with test.mock.patch.dict('os.environ',
                                  {'TF_CONFIG': json.dumps(tf_config)}):
            config = run_config.RunConfig()
            # Because we did not start a distributed cluster, we need to pass an
            # empty ClusterSpec, otherwise the device_setter will look for
            # distributed jobs, such as "/job:ps" which are not present.
            config._cluster_spec = server_lib.ClusterSpec({})

        classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
            linear_feature_columns=(sparse_feature, ),
            dnn_feature_columns=(embedding_feature, ),
            dnn_hidden_units=(3, 3),
            config=config)

        metrics = classifier.fit(input_fn=_input_fn,
                                 steps=_ITERS).evaluate(input_fn=_input_fn,
                                                        steps=100)
        self._assertCommonMetrics(metrics)
示例#15
0
    def testThatLeafIndexIsInPredictions(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")],
            output_leaf_index=True)

        classifier.fit(input_fn=_train_input_fn, steps=15)
        result_iter = classifier.predict(input_fn=_eval_input_fn)
        for prediction_dict in result_iter:
            self.assertTrue("leaf_index" in prediction_dict)
            self.assertTrue("logits" in prediction_dict)
示例#16
0
    def test_infer(self):
        gmm = GMM(self.num_centers,
                  initial_clusters=self.initial_means,
                  batch_size=self.batch_size,
                  steps=40,
                  continue_training=True,
                  random_seed=4,
                  config=run_config.RunConfig(tf_random_seed=2))
        gmm.fit(x=self.points, steps=60)
        clusters = gmm.clusters()

        # Make a small test set
        points, true_assignments, true_offsets = (self.make_random_points(
            clusters, 40))

        assignments = np.ravel(gmm.predict(points))
        self.assertAllEqual(true_assignments, assignments)

        # Test score
        score = gmm.score(points)
        self.assertNear(score, np.sum(true_offsets), 4.05)
示例#17
0
    def test_predict_with_cosine_distance_and_kmeans_plus_plus(self):
        # Most points are concetrated near one center. KMeans++ is likely to find
        # the less populated centers.
        points = np.array([[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3],
                           [-3.1, -3.2], [-2.8, -3.],
                           [-2.9, -3.1], [-3., -3.1], [-3., -3.1], [-3.2, -3.],
                           [-3., -3.]]).astype(np.float32)
        true_centers = np.array([
            normalize(np.mean(normalize(points)[0:2, :], axis=0,
                              keepdims=True))[0],
            normalize(np.mean(normalize(points)[2:4, :], axis=0,
                              keepdims=True))[0],
            normalize(np.mean(normalize(points)[4:, :], axis=0,
                              keepdims=True))[0]
        ])
        true_assignments = [0] * 2 + [1] * 2 + [2] * 8
        true_score = len(points) - np.tensordot(normalize(points),
                                                true_centers[true_assignments])

        kmeans = KMeans(3,
                        initial_clusters=kmeans_ops.KMEANS_PLUS_PLUS_INIT,
                        distance_metric=kmeans_ops.COSINE_DISTANCE,
                        use_mini_batch=self.use_mini_batch,
                        batch_size=12,
                        continue_training=True,
                        config=run_config.RunConfig(tf_random_seed=3))
        kmeans.fit(x=points, steps=30)

        centers = normalize(kmeans.clusters())
        self.assertAllClose(sorted(centers.tolist()),
                            sorted(true_centers.tolist()),
                            atol=1e-2)

        assignments = kmeans.predict(points)
        self.assertAllClose(centers[assignments],
                            true_centers[true_assignments],
                            atol=1e-2)

        score = kmeans.score(points)
        self.assertAllClose(score, true_score, atol=1e-2)
示例#18
0
  def test_values_from_tf_config(self):
    tf_config = {
        "cluster": {
            run_config_lib.TaskType.PS: ["host1:1", "host2:2"],
            run_config_lib.TaskType.WORKER: ["host3:3", "host4:4", "host5:5"]
        },
        "task": {
            "type": run_config_lib.TaskType.WORKER,
            "index": 1
        }
    }
    with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
      config = run_config_lib.RunConfig()

    self.assertEqual(config.master, "grpc://host4:4")
    self.assertEqual(config.task_id, 1)
    self.assertEqual(config.num_ps_replicas, 2)
    self.assertEqual(config.num_worker_replicas, 3)
    self.assertEqual(config.cluster_spec.as_dict(), tf_config["cluster"])
    self.assertEqual(config.task_type, run_config_lib.TaskType.WORKER)
    self.assertFalse(config.is_chief)
    self.assertEqual(config.evaluation_master, "")
    def testLinearRegression(self):
        my_seed = 42
        config = run_config.RunConfig(tf_random_seed=my_seed)
        boston = base.load_boston()
        columns = [feature_column.real_valued_column('', dimension=13)]

        # We train with

        with ops.Graph().as_default() as g1:
            random.seed(my_seed)
            g1.seed = my_seed
            variables.create_global_step()
            regressor1 = linear.LinearRegressor(optimizer=_NULL_OPTIMIZER,
                                                feature_columns=columns,
                                                config=config)
            regressor1.fit(x=boston.data, y=boston.target, steps=1)

        with ops.Graph().as_default() as g2:
            random.seed(my_seed)
            g2.seed = my_seed
            variables.create_global_step()
            regressor2 = linear.LinearRegressor(optimizer=_NULL_OPTIMIZER,
                                                feature_columns=columns,
                                                config=config)
            regressor2.fit(x=boston.data, y=boston.target, steps=1)

        variable_names = regressor1.get_variable_names()
        self.assertIn('linear//weight', variable_names)
        self.assertIn('linear/bias_weight', variable_names)
        regressor1_weights = regressor1.get_variable_value('linear//weight')
        regressor2_weights = regressor2.get_variable_value('linear//weight')
        regressor1_bias = regressor1.get_variable_value('linear/bias_weight')
        regressor2_bias = regressor2.get_variable_value('linear/bias_weight')
        self.assertAllClose(regressor1_weights, regressor2_weights)
        self.assertAllClose(regressor1_bias, regressor2_bias)
        self.assertAllClose(
            list(regressor1.predict_scores(boston.data, as_iterable=True)),
            list(regressor2.predict_scores(boston.data, as_iterable=True)),
            atol=1e-05)
    def testFitAndEvaluateDontThrowException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.DNNBoostedTreeCombinedClassifier(
            dnn_hidden_units=[1],
            dnn_feature_columns=[feature_column.real_valued_column("x")],
            tree_learner_config=learner_config,
            num_trees=1,
            tree_examples_per_layer=3,
            n_classes=2,
            model_dir=model_dir,
            config=config,
            dnn_steps_to_train=10,
            dnn_input_layer_to_tree=False,
            tree_feature_columns=[feature_column.real_valued_column("x")])

        classifier.fit(input_fn=_train_input_fn, steps=15)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
示例#21
0
    def testTrainEvaluateInferDoesNotThrowError(self):
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        est = estimator.CoreGradientBoostedDecisionTreeEstimator(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")])

        # Train for a few steps.
        est.train(input_fn=_train_input_fn, steps=1000)
        est.evaluate(input_fn=_eval_input_fn, steps=1)
        est.predict(input_fn=_eval_input_fn)
    def testTaskIsSetOnWorkerWhenJobNameIsSet(self):
        tf_config = {
            'cluster': {
                run_config.TaskType.PS: ['fake_ps_0']
            },
            'task': {
                'type': run_config.TaskType.WORKER,
                'index': 3
            }
        }
        with test.mock.patch.dict('os.environ',
                                  {'TF_CONFIG': json.dumps(tf_config)}):
            config = run_config.RunConfig()

        with ops.device(estimator._get_replica_device_setter(config)):
            v = variables_lib.Variable([1, 2])
            w = variables_lib.Variable([2, 1])
            a = v + w
        self.assertDeviceEqual('/job:ps/task:0', v.device)
        self.assertDeviceEqual('/job:ps/task:0', v.initializer.device)
        self.assertDeviceEqual('/job:ps/task:0', w.device)
        self.assertDeviceEqual('/job:ps/task:0', w.initializer.device)
        self.assertDeviceEqual('/job:worker/task:3', a.device)
示例#23
0
文件: train.py 项目: xysmlx/seq2seq
def get_run_config():

    ps_hosts = FLAGS.ps_hosts.split(",")
    worker_hosts = FLAGS.worker_hosts.split(",")

    if FLAGS.cloud is True:
        os.environ["environment"] = run_config.Environment.CLOUD
        cluster = {"ps": ps_hosts, "worker": worker_hosts}
        os.environ["TF_CONFIG"] = json.dumps({
            "cluster":
            cluster,
            "environment":
            os.environ["environment"],
            "task": {
                "type": FLAGS.job_name,
                "index": FLAGS.task_index
            }
        })
    else:
        os.environ["environment"] = run_config.Environment.LOCAL
    tf.logging.info("tf_config:{}".format(os.environ.get("TF_CONFIG", None)))

    config = run_config.RunConfig(
        tf_random_seed=FLAGS.tf_random_seed,
        save_checkpoints_secs=FLAGS.save_checkpoints_secs,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,
        gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    config.tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth
    config.tf_config.log_device_placement = FLAGS.log_device_placement
    config.tf_config.allow_soft_placement = FLAGS.allow_soft_placement

    if Is_chief(config):
        config._is_chief = True

    return config
示例#24
0
def do_training(id, X_train, X_test, y_train, y_test):
  global batch_step
  global training_set_size
  #global ID

  MODEL_DIR = "model_" + str(id)

  if os.path.isdir(MODEL_DIR):
    print("Removing old model dir...")
    shutil.rmtree(MODEL_DIR)

  # Specify that all features have real-value data
  feature_columns = [tf.contrib.layers.real_valued_column(
                      "", dimension=X_train.shape[1])]
  batch_step = 0
  training_set_size = PARAMS['training_set_size']
  config = run_config.RunConfig(log_device_placement=False, \
   save_checkpoints_secs=5)
  classifier = tf.contrib.learn.DNNRegressor( # activation_fn: tf.nn.relu by default
      feature_columns=feature_columns,
      hidden_units=PARAMS['layers'],
      model_dir=MODEL_DIR,
      optimizer=tf.train.AdamOptimizer(learning_rate=PARAMS['learning_rate'], epsilon=0.8),
      dropout=PARAMS['dropout'],
      config=config)

  monitor = RegressionMonitor(x=X_test, y=y_test)
  classifier.fit(input_fn = lambda: input_fn(X_train,y_train),
                 steps=PARAMS['max_steps'], monitors=[monitor])

  # Evaluate accuracy.
  errors = eval_test(classifier)
  for err in errors:
    print(err)

  return evaluate(classifier, X_test, y_test)
示例#25
0
    def benchmarkLogisticMatrixDataLabels1D(self):
        def _input_fn():
            iris = test_data.prepare_iris_data_for_logistic_regression()
            return {
                'feature': constant_op.constant(iris.data,
                                                dtype=dtypes.float32)
            }, constant_op.constant(iris.target,
                                    shape=(100, ),
                                    dtype=dtypes.int32)

        classifier = dnn.DNNClassifier(
            feature_columns=(feature_column.real_valued_column('feature',
                                                               dimension=4), ),
            hidden_units=(3, 3),
            config=run_config.RunConfig(tf_random_seed=1))
        steps = 1000
        metrics = classifier.fit(input_fn=_input_fn,
                                 steps=steps).evaluate(input_fn=_input_fn,
                                                       steps=1)
        estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
                                             metrics)
        estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics)

        self._report_metrics(metrics)
示例#26
0
    def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        # Use core head
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

        model = estimator.GradientBoostedDecisionTreeEstimator(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")],
            use_core_libs=True)

        model.fit(input_fn=_train_input_fn, steps=15)
        model.evaluate(input_fn=_eval_input_fn, steps=1)
        model.export(self._export_dir_base)
def _create_experiment_fn(output_dir):  # pylint: disable=unused-argument
  """Experiment creation function."""
  (columns, label_column, wide_columns, deep_columns, categorical_columns,
   continuous_columns) = census_model_config()

  census_data_source = CensusDataSource(FLAGS.data_dir,
                                        TRAIN_DATA_URL, TEST_DATA_URL,
                                        columns, label_column,
                                        categorical_columns,
                                        continuous_columns)

  os.environ["TF_CONFIG"] = json.dumps({
      "cluster": {
          tf.contrib.learn.TaskType.PS: ["fake_ps"] *
                                        FLAGS.num_parameter_servers
      },
      "task": {
          "index": FLAGS.worker_index
      }
  })
  config = run_config.RunConfig(master=FLAGS.master_grpc_url)

  estimator = tf.contrib.learn.DNNLinearCombinedClassifier(
      model_dir=FLAGS.model_dir,
      linear_feature_columns=wide_columns,
      dnn_feature_columns=deep_columns,
      dnn_hidden_units=[5],
      config=config)

  return tf.contrib.learn.Experiment(
      estimator=estimator,
      train_input_fn=census_data_source.input_train_fn,
      eval_input_fn=census_data_source.input_test_fn,
      train_steps=FLAGS.train_steps,
      eval_steps=FLAGS.eval_steps
  )
示例#28
0
  def test_explicitly_specified_values(self):
    cluster_spec = {
        run_config_lib.TaskType.PS: ["localhost:9990"],
        "my_job_name": ["localhost:9991", "localhost:9992", "localhost:0"]
    }
    tf_config = {
        "cluster": cluster_spec,
        "task": {
            "type": run_config_lib.TaskType.WORKER,
            "index": 2
        }
    }
    with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
      config = run_config_lib.RunConfig(
          master="localhost:0", evaluation_master="localhost:9991")

    self.assertEqual(config.master, "localhost:0")
    self.assertEqual(config.task_id, 2)
    self.assertEqual(config.num_ps_replicas, 1)
    self.assertEqual(config.num_worker_replicas, 0)
    self.assertEqual(config.cluster_spec, server_lib.ClusterSpec(cluster_spec))
    self.assertEqual(config.task_type, run_config_lib.TaskType.WORKER)
    self.assertFalse(config.is_chief)
    self.assertEqual(config.evaluation_master, "localhost:9991")
示例#29
0
  def testModelFnArgs(self):
    features = {'x': 42., 'y': 43.}
    labels = 44.
    expected_params = {'some_param': 'some_value'}
    expected_config = run_config.RunConfig()
    expected_config.i_am_test = True

    # TODO(ptucker): We have to roll our own mock since Estimator._get_arguments
    # doesn't work with mock fns.
    model_fn_call_count = [0]

    # `features` and `labels` are passed by position, `arg0` and `arg1` here.
    def _model_fn(arg0, arg1, mode, params, config):
      model_fn_call_count[0] += 1
      # self.assertItemsEqual(features.keys(), arg0.keys())
      # self.assertEqual(model_fn.ModeKeys.TRAIN, mode)
      # self.assertEqual(expected_params, params)
      # self.assertTrue(config.i_am_test)
      return _model_fn_ops(features, labels, arg0, arg1, mode)

    est = estimator.Estimator(
        model_fn=_model_fn, params=expected_params, config=expected_config)
    # self.assertEqual(0, model_fn_call_count[0])
    est.fit(input_fn=_make_input_fn(features, labels), steps=1)
    def testRegression_TensorData(self):
        """Tests regression using tensor data as input."""
        def _input_fn(num_epochs=None):
            features = {
                'age':
                input_lib.limit_epochs(constant_op.constant([[.8], [.15],
                                                             [0.]]),
                                       num_epochs=num_epochs),
                'language':
                sparse_tensor.SparseTensor(values=input_lib.limit_epochs(
                    ['en', 'fr', 'zh'], num_epochs=num_epochs),
                                           indices=[[0, 0], [0, 1], [2, 0]],
                                           dense_shape=[3, 2])
            }
            return features, constant_op.constant([1., 0., 0.2],
                                                  dtype=dtypes.float32)

        regressor = debug.DebugRegressor(config=run_config.RunConfig(
            tf_random_seed=1))

        regressor.fit(input_fn=_input_fn, steps=200)

        scores = regressor.evaluate(input_fn=_input_fn, steps=1)
        self.assertIn('loss', scores)