def model_fn(sync, num_replicas): # 这些变量在后续的训练操作函数 train_fn() 中会使用到, # 所以这里使用了 global 变量。 global input_images, loss, labels, optimizer, train_op, accuracy global mnist, global_step # 构建推理模型 input_images = tf.placeholder(tf.float32, [None, 784], name='image') W = tf.Variable(tf.zeros([784, 10]), name='weights') tf.summary.histogram("weights", W) b = tf.Variable(tf.zeros([10]), name='bias') tf.summary.histogram("bias", b) logits = tf.matmul(input_images, W) + b global_step = tf.Variable(0, name='global_step', trainable=False) # Define loss and optimizer labels = tf.placeholder(tf.float32, [None, 10], name='labels') cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)) loss = tf.reduce_mean(cross_entropy, name='loss') tf.add_to_collection(tf.GraphKeys.LOSSES, loss) # Create optimizer to compute gradient optimizer = tf.train.AdagradOptimizer(0.01) if sync: num_workers = num_replicas optimizer = tf.train.SyncReplicasOptimizer( optimizer, replicas_to_aggregate=num_workers, total_num_replicas=num_workers, name="mnist_sync_replicas") train_op = optimizer.minimize(cross_entropy, global_step=global_step) # Test trained model correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) def accuracy_evalute_fn(session): return session.run(accuracy, feed_dict={ input_images: mnist.validation.images, labels: mnist.validation.labels }) # 定义模型导出配置 model_export_spec = model_exporter.ModelExportSpec( export_dir=FLAGS.export_dir, input_tensors={"image": input_images}, output_tensors={"logits": logits}) # 定义模型评测(准确率)的计算方法 model_metric_ops = {"accuracy": accuracy_evalute_fn} return dist_base.ModelFnHandler(global_step=global_step, optimizer=optimizer, model_metric_ops=model_metric_ops, model_export_spec=model_export_spec)
def model_fn(sync, num_replicas): global _train_op, _infer, _user_batch, _item_batch, _rate_batch, _rmse, _cost, _global_step _user_batch = tf.placeholder(tf.int32, shape=[None], name="user") _item_batch = tf.placeholder(tf.int32, shape=[None], name="item") _rate_batch = tf.placeholder(tf.float32, shape=[None], name="rate") _infer = inference(_user_batch, _item_batch, FLAGS.embedding_dim) _global_step = tf.contrib.framework.get_or_create_global_step() _cost = tf.square(_infer - _rate_batch) optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) if sync: optimizer = tf.train.SyncReplicasOptimizer( optimizer, replicas_to_aggregate=num_replicas, total_num_replicas=num_replicas, name="mnist_sync_replicas") gradients, variables = zip(*optimizer.compute_gradients(_cost)) gradients, _ = tf.clip_by_global_norm(gradients, 5.0) _train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=_global_step) _rmse = tf.sqrt(tf.reduce_mean(_cost)) def rmse_evalute_fn(session): return session.run(_rmse, feed_dict={ _user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"] }) # 定义模型导出配置 model_export_spec = model_exporter.ModelExportSpec( export_dir=FLAGS.export_dir, input_tensors={ "user": _user_batch, "item": _item_batch }, output_tensors={"infer": _infer}) # 定义模型评测(准确率)的计算方法 model_metric_ops = {"rmse": rmse_evalute_fn} return dist_base.ModelFnHandler(global_step=_global_step, optimizer=optimizer, model_metric_ops=model_metric_ops, model_export_spec=model_export_spec, summary_op=None)
def model_fn(sync, num_replicas): ##################### # 构建模型 # ################### # # 构建线性回归推理模型: # y = 0.5*x + 2 a = tf.Variable(0.5, name="a") b = tf.Variable(2.0, name="b") x = tf.placeholder(tf.float32, name="x") y = tf.add(tf.multiply(a, x), b, name="y") global_step = tf.Variable(0, name='global_step', trainable=False) ##################### # 添加资产文件 ##################### # # 资产文件会在模型导出和导入时被当作模型的一部分进行处理。 # 资产文件主要应用场景:训练模型的某些操作需要外部附加文件进行初始化等。 # 在导出模型的时候,资产文件会被拷贝到模型导出路径的 assets 目录下。 original_assets_directory = "/tmp/original/export/assets" original_assets_filename = "foo.txt" original_assets_filepath = _write_assets(original_assets_directory, original_assets_filename) assets_filepath = tf.constant(original_assets_filepath) tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, assets_filepath) filename_tensor = tf.Variable(original_assets_filename, name="filename_tensor", trainable=False, collections=[]) assign_filename_op = filename_tensor.assign(original_assets_filename) # 定义模型导出配置 if os.path.exists(FLAGS.export_dir): print("The export path has existed, try to delete it...") shutil.rmtree(FLAGS.export_dir) print("The export path has been deleted.") model_export_spec = model_exporter.ModelExportSpec( export_dir=FLAGS.export_dir, input_tensors={'x': x}, output_tensors={'y': y}, assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS), legacy_init_op=tf.group(assign_filename_op)) return dist_base.ModelFnHandler(global_step=global_step, model_export_spec=model_export_spec)
def model_fn(sync, num_replicas): ##################### # 构建模型 # ################### a1 = tf.Variable(0.5, name="a1") b1 = tf.Variable(2.0, name="b1") a2 = tf.Variable(2.0, name="a2") b2 = tf.Variable(3.0, name="b2") a3 = tf.Variable(4.0, name="a3") b3 = tf.Variable(5.0, name="b3") # y1 = 0.5*x1 + 2 x1 = tf.placeholder(tf.float32, name="x1") y1 = tf.add(tf.multiply(a1, x1), b1, name="y1") # y2 = 2*x1 + 3 y2 = tf.add(tf.multiply(a2, x1), b2, name="y2") # y1 = 4*x1 + 5 x2 = tf.placeholder(tf.float32, name="x2") y3 = tf.add(tf.multiply(a3, x2), b3, name="y3") global_step = tf.Variable(0, name='global_step', trainable=False) # 定义模型导出配置 if os.path.exists(FLAGS.export_dir): print("The export path has existed, try to delete it...") shutil.rmtree(FLAGS.export_dir) print("The export path has been deleted.") input_tensors = { 'x1': x1, 'x2': x2, } output_tensors = { 'y1': y1, 'y2': y2, 'y3': y3, } model_export_spec = model_exporter.ModelExportSpec( export_dir=FLAGS.export_dir, input_tensors=input_tensors, output_tensors=output_tensors) return dist_base.ModelFnHandler( global_step=global_step, model_export_spec=model_export_spec)
def model_fn(sync, num_replicas): global _train_op, _infer, _user_batch, _item_batch, _rate_batch, _rmse, _cost, _global_step _user_batch = tf.placeholder(tf.int32, shape=[None], name="user") _item_batch = tf.placeholder(tf.int32, shape=[None], name="item") _rate_batch = tf.placeholder(tf.float32, shape=[None], name="rate") _infer = inference(_user_batch, _item_batch, FLAGS.embedding_dim) _global_step = tf.contrib.framework.get_or_create_global_step() _cost = tf.square(_infer - _rate_batch) optimizer = tf.train.AdamOptimizer(0.001) _train_op = optimizer.minimize(_cost, global_step=_global_step) _rmse = tf.sqrt(tf.reduce_mean(_cost)) def rmse_evalute_fn(session): return session.run(_rmse, feed_dict={ _user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"]}) # 定义模型导出配置 model_export_spec = model_exporter.ModelExportSpec( export_dir=FLAGS.export_dir, input_tensors={"user": _user_batch, "item": _item_batch}, output_tensors={"infer": _infer}) # 定义模型评测(准确率)的计算方法 model_metric_ops = { "rmse": rmse_evalute_fn } return dist_base.ModelFnHandler( global_step=_global_step, optimizer=optimizer, model_metric_ops=model_metric_ops, model_export_spec=model_export_spec, summary_op=None)
global _input_tensors, _output_tensor _input_tensors = {k: tf.placeholder(dtype=tf.float64, shape=[None], name=k) for k in FEATURES} _output_tensor = tf.placeholder(dtype=tf.float64, shape=[None], name=LABEL) return _input_tensors, _output_tensor def feed_fn(data_set): global _input_tensors, _output_tensor feed_dict = {_input_tensors[k]: data_set[k].values for k in FEATURES} feed_dict[_output_tensor] = data_set[LABEL].values return feed_dict train_monitors = [tf.train.FeedFnHook(lambda: feed_fn(training_set))] eval_hooks = [tf.train.FeedFnHook(lambda: feed_fn(test_set))] model_export_spec = None if FLAGS.export_dir is not None: model_export_spec = model_exporter.ModelExportSpec( export_dir=FLAGS.export_dir, features=feature_cols) exp = dist_base.Experiment( estimator = regressor, train_input_fn = input_fn, eval_input_fn = input_fn, train_monitors = train_monitors, eval_hooks = eval_hooks, eval_steps = 1, model_export_spec = model_export_spec) exp.run()
def model_fn(sync, num_replicas): # 这些变量在后续的训练操作函数 train_fn() 中会使用到, # 所以这里使用了 global 变量。 global _input_images, _loss, _labels, _train_op, _accuracy global _mnist, _global_step, _summary_op, _summary_writer # 构建推理模型 _input_images = tf.placeholder(tf.float32, [None, 784], name='image') W = tf.Variable(tf.zeros([784, 10]), name='weights') tf.summary.histogram("weights", W) b = tf.Variable(tf.zeros([10]), name='bias') tf.summary.histogram("bias", b) logits = tf.matmul(_input_images, W) + b _global_step = tf.Variable(0, name='global_step', trainable=False) # Define loss and optimizer _labels = tf.placeholder(tf.float32, [None, 10], name='labels') cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=_labels)) tf.summary.scalar("cross_entropy", cross_entropy) _loss = tf.reduce_mean(cross_entropy, name='loss') tf.add_to_collection(tf.GraphKeys.LOSSES, _loss) # Create optimizer to compute gradient optimizer = tf.train.AdagradOptimizer(0.01) if sync: num_workers = num_replicas optimizer = tf.train.SyncReplicasOptimizer( optimizer, replicas_to_aggregate=num_workers, total_num_replicas=num_workers, name="mnist_sync_replicas") _train_op = optimizer.minimize(cross_entropy, global_step=_global_step) # 自定义计算模型 summary 信息的 Operation, # 并定义一个 FileWriter 用于保存模型 summary 信息。 # 其中 dist_base.cfg.logdir 是 TaaS 平台上设置的训练日志路径参数。 _summary_op = tf.summary.merge_all() _summary_writer = tf.summary.FileWriter(dist_base.cfg.logdir) # Test trained model correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(_labels, 1)) _accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) def accuracy_evalute_fn(session): return session.run(_accuracy, feed_dict={ _input_images: _mnist.validation.images, _labels: _mnist.validation.labels }) # 定义模型导出配置 model_export_spec = model_exporter.ModelExportSpec( export_dir=FLAGS.export_dir, input_tensors={"image": _input_images}, output_tensors={"logits": logits}) # 定义模型评测(准确率)的计算方法 model_metric_ops = {"accuracy": accuracy_evalute_fn} # 因为模型中需要计算 tf.summary.scalar(cross_entropy),而该 summary 的计算需要 # feed 设置 _input_images 和 _labels,所以这里将 summary_op 设置成 None,将关闭 # TaaS 的自动计算和保存模型 summary 信息机制。在 train_op 函数中自己来计算并收集 # 模型 Graph 的 summary 信息。 return dist_base.ModelFnHandler(global_step=_global_step, optimizer=optimizer, model_metric_ops=model_metric_ops, model_export_spec=model_export_spec, summary_op=None)