def main(): args = parser.parse_args() cluster_mode = args.cluster_mode if cluster_mode.startswith("yarn"): hadoop_conf = os.environ.get("HADOOP_CONF_DIR") assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \ "set the environment variable HADOOP_CONF_DIR" spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \ .set("spark.executor.cores", 2) \ .set("spark.executor.instances", 2) \ .set("spark.driver.memory", "2g") if cluster_mode == "yarn-client": sc = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf) else: sc = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf) else: sc = init_nncontext() def model_fn(features, labels, mode): from nets import lenet slim = tf.contrib.slim with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(features, num_classes=10, is_training=True) if mode == tf.estimator.ModeKeys.EVAL or mode == tf.estimator.ModeKeys.TRAIN: loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) optimizer = ZooOptimizer(tf.train.AdamOptimizer()) train_op = optimizer.minimize(loss) return tf.estimator.EstimatorSpec(mode, predictions=logits, loss=loss, train_op=train_op) else: return tf.estimator.EstimatorSpec(mode, predictions=logits) def input_fn(mode): if mode == tf.estimator.ModeKeys.TRAIN: training_data = get_data("train") dataset = TFDataset.from_ndarrays(training_data, batch_size=320) elif mode == tf.estimator.ModeKeys.EVAL: testing_data = get_data("test") dataset = TFDataset.from_ndarrays(testing_data, batch_per_thread=80) else: images, _ = get_data("test") dataset = TFDataset.from_ndarrays(images, batch_per_thread=80) return dataset estimator = TFEstimator.from_model_fn(model_fn, model_dir="/tmp/estimator") estimator.train(input_fn, steps=10) metrics = estimator.evaluate(input_fn, ["acc"]) print(metrics) predictions = estimator.predict(input_fn) print(predictions.first()) print("finished...") sc.stop()
def test_estimator_for_imageset(self): model_fn = self.create_model_fn() input_fn = self.create_imageset_input_fn() estimator = TFEstimator.from_model_fn(model_fn) estimator.train(input_fn, steps=1) estimator.evaluate(input_fn, ["acc"]) results = estimator.predict(input_fn).get_predict().collect() assert all(r[1] is not None for r in results)
def test_estimator_without_batch(self): def model_fn(features, labels, mode): assert features.shape.ndims == 1 if labels is not None: assert labels.shape.ndims == 0 features = tf.expand_dims(features, axis=0) h1 = tf.layers.dense(features, 64, activation=tf.nn.relu) h2 = tf.layers.dense(h1, 64, activation=tf.nn.relu) logits = tf.layers.dense(h2, 10) if mode == tf.estimator.ModeKeys.EVAL or mode == tf.estimator.ModeKeys.TRAIN: labels = tf.expand_dims(labels, axis=0) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) train_op = ZooOptimizer( tf.train.AdamOptimizer()).minimize(loss) return tf.estimator.EstimatorSpec(mode, train_op=train_op, predictions=logits, loss=loss) else: return tf.estimator.EstimatorSpec(mode, predictions=logits) def input_fn(mode): np.random.seed(20) x = np.random.rand(20, 10) y = np.random.randint(0, 10, (20)) rdd_x = self.sc.parallelize(x) rdd_y = self.sc.parallelize(y) rdd = rdd_x.zip(rdd_y) if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: dataset = TFDataset.from_rdd(rdd, features=(tf.float32, [10]), labels=(tf.int32, [])) else: dataset = TFDataset.from_rdd(rdd_x, features=(tf.float32, [10])) return dataset estimator = TFEstimator.from_model_fn(model_fn) self.intercept( lambda: estimator.train(input_fn, steps=1), "The batch_size of TFDataset must be specified when used for training." ) estimator.evaluate(input_fn, ["acc"]) estimator.predict(input_fn).collect()
def test_init_TFDataset_from_ndarrays(self): model_fn = self.create_model_fn() def input_fn(mode): x = np.random.rand(20, 10) y = np.random.randint(0, 10, (20, )) if mode == tf.estimator.ModeKeys.TRAIN: return TFDataset.from_ndarrays((x, y), batch_size=8) elif mode == tf.estimator.ModeKeys.EVAL: return TFDataset.from_ndarrays((x, y), batch_per_thread=1) else: return TFDataset.from_ndarrays(x, batch_per_thread=1) estimator = TFEstimator.from_model_fn(model_fn) estimator.train(input_fn, 10) estimator.evaluate(input_fn, ["acc"]) estimator.predict(input_fn)
def main(option): batch_size = 16 if not option.batch_size else int(option.batch_size) cluster_mode = options.cluster_mode if cluster_mode.startswith("yarn"): hadoop_conf = os.environ.get("HADOOP_CONF_DIR") assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \ "set the environment variable HADOOP_CONF_DIR" spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \ .set("spark.executor.cores", 2) \ .set("spark.executor.instances", 2) \ .set("spark.driver.memory", "2g") if cluster_mode == "yarn-client": sc = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf) else: sc = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf) else: sc = init_nncontext() def input_fn(mode, params): if mode == tf.estimator.ModeKeys.TRAIN: image_set = ImageSet.read(params["image_path"], sc=sc, with_label=True, one_based_label=False) train_transformer = ChainedPreprocessing([ ImageBytesToMat(), ImageResize(256, 256), ImageRandomCrop(224, 224), ImageRandomPreprocessing(ImageHFlip(), 0.5), ImageChannelNormalize(0.485, 0.456, 0.406, 0.229, 0.224, 0.225), ImageMatToTensor(to_RGB=True, format="NHWC"), ImageSetToSample(input_keys=["imageTensor"], target_keys=["label"]) ]) feature_set = FeatureSet.image_frame(image_set.to_image_frame()) feature_set = feature_set.transform(train_transformer) feature_set = feature_set.transform(ImageFeatureToSample()) dataset = TFDataset.from_feature_set(feature_set, features=(tf.float32, [224, 224, 3]), labels=(tf.int32, [1]), batch_size=batch_size) else: raise NotImplementedError return dataset def model_fn(features, labels, mode, params): from nets import inception slim = tf.contrib.slim labels = tf.squeeze(labels, axis=1) with slim.arg_scope(inception.inception_v1_arg_scope()): logits, end_points = inception.inception_v1( features, num_classes=int(params["num_classes"]), is_training=True) if mode == tf.estimator.ModeKeys.TRAIN: loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) train_op = ZooOptimizer(tf.train.AdamOptimizer()).minimize(loss) return tf.estimator.EstimatorSpec(mode, train_op=train_op, predictions=logits, loss=loss) else: raise NotImplementedError estimator = TFEstimator.from_model_fn(model_fn, params={ "image_path": option.image_path, "num_classes": option.num_classes, "batch_size": option.batch_size }) estimator.train(input_fn, steps=100) print("finished...") sc.stop()
def test_estimator_for_feature_set(self): model_fn = self.create_model_fn() input_fn = self.create_train_feature_set_input_fn() estimator = TFEstimator.from_model_fn(model_fn) estimator.train(input_fn, steps=1)
def test_predict(self): model_fn = self.create_model_fn() input_fn = self.create_input_fn() estimator = TFEstimator.from_model_fn(model_fn) results = estimator.predict(input_fn).collect()
def test_evaluating(self): model_fn = self.create_model_fn() input_fn = self.create_input_fn() estimator = TFEstimator.from_model_fn(model_fn) eval_results = estimator.evaluate(input_fn, ["acc"]) assert len(eval_results) > 0
def test_training(self): model_fn = self.create_model_fn() input_fn = self.create_input_fn() estimator = TFEstimator.from_model_fn(model_fn) estimator.train(input_fn, steps=60000 // 320)
vocabulary = dftrain[feature_name].unique() feature_columns.append( tf.feature_column.categorical_column_with_vocabulary_list( feature_name, vocabulary)) for feature_name in NUMERIC_COLUMNS: feature_columns.append( tf.feature_column.numeric_column(feature_name, dtype=tf.float32)) sc = init_nncontext() linear_est = tf.estimator.LinearClassifier( feature_columns=feature_columns, optimizer=ZooOptimizer(tf.train.FtrlOptimizer(0.2)), model_dir="/tmp/estimator/linear") zoo_est = TFEstimator(linear_est) train_input_fn = make_input_fn(dftrain, y_train, mode=tf.estimator.ModeKeys.TRAIN, batch_size=32) zoo_est.train(train_input_fn, steps=200) eval_input_fn = make_input_fn(dfeval, y_eval, mode=tf.estimator.ModeKeys.EVAL, batch_per_thread=8) eval_result = zoo_est.evaluate(eval_input_fn, ["acc"]) print(eval_result) pred_input_fn = make_input_fn(dfeval, y_eval,