def main(argv): parser = WideDeepArgParser() flags = parser.parse_args(args=argv[1:]) # Clean up the model directory if present shutil.rmtree(flags.model_dir, ignore_errors=True) model = build_estimator(flags.model_dir, flags.model_type) train_file = os.path.join(flags.data_dir, 'adult.data') test_file = os.path.join(flags.data_dir, 'adult.test') # Train and evaluate the model every `FLAGS.epochs_per_eval` epochs. def train_input_fn(): return input_fn(train_file, flags.epochs_per_eval, True, flags.batch_size) def eval_input_fn(): return input_fn(test_file, 1, False, flags.batch_size) train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size, tensors_to_log={'average_loss': 'head/truediv', 'loss': 'head/weighted_loss/Sum'}) # Train and evaluate the model every `FLAGS.epochs_between_evals` epochs. for n in range(flags.train_epochs // flags.epochs_between_evals): model.train(input_fn=train_input_fn, hooks=train_hooks) results = model.evaluate(input_fn=eval_input_fn) # Display evaluation metrics print('Results at epoch', (n + 1) * flags.epochs_between_evals) print('-' * 60) for key in sorted(results): print('%s: %s' % (key, results[key]))
def dan_main(flags, model_function, input_function): os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' if flags.multi_gpu: validate_batch_size_for_multi_gpu(flags.batch_size) model_function = tf.contrib.estimator.replicate_model_fn(model_function,loss_reduction=tf.losses.Reduction.MEAN) session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9, session_config=session_config) estimator = tf.estimator.Estimator( model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'dan_stage':flags.dan_stage, 'num_lmark':flags.num_lmark, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, }) if flags.mode == tf.estimator.ModeKeys.PREDICT: import cv2 predict_results = estimator.predict(input_function) for x in predict_results: landmark = x['s2_ret'] img = x['img'] cv2.imshow('t',img) cv2.waitKey(30) return def input_fn_eval(): return input_function(False, flags.data_dir if flags.data_dir_test is not None else flags.data_dir_test, flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu) def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_per_eval, flags.num_parallel_calls, flags.multi_gpu) if flags.mode == tf.estimator.ModeKeys.EVAL: eval_results = estimator.evaluate(input_fn=input_fn_eval,steps=flags.max_train_steps) print(eval_results) if flags.mode == tf.estimator.ModeKeys.TRAIN: for _ in range(flags.train_epochs // flags.epochs_per_eval): train_hooks = hooks_helper.get_train_hooks(["LoggingTensorHook"], batch_size=flags.batch_size) print('Starting a training cycle.') estimator.train(input_fn=input_fn_train, max_steps=flags.max_train_steps) print('Starting to evaluate.') eval_results = estimator.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results)
def main(argv): parser = WideDeepArgParser() flags = parser.parse_args(args=argv[1:]) # Clean up the model directory if present shutil.rmtree(flags.model_dir, ignore_errors=True) model = build_estimator(flags.model_dir, flags.model_type) train_file = os.path.join(flags.data_dir, 'adult.data') test_file = os.path.join(flags.data_dir, 'adult.test') # Train and evaluate the model every `flags.epochs_between_evals` epochs. def train_input_fn(): return input_fn(train_file, flags.epochs_between_evals, True, flags.batch_size) def eval_input_fn(): return input_fn(test_file, 1, False, flags.batch_size) train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size, tensors_to_log={'average_loss': 'head/truediv', 'loss': 'head/weighted_loss/Sum'}) # Train and evaluate the model every `flags.epochs_between_evals` epochs. for n in range(flags.train_epochs // flags.epochs_between_evals): model.train(input_fn=train_input_fn, hooks=train_hooks) results = model.evaluate(input_fn=eval_input_fn) # Display evaluation metrics print('Results at epoch', (n + 1) * flags.epochs_between_evals) print('-' * 60) for key in sorted(results): print('%s: %s' % (key, results[key]))
def main(_): model_function = model_fn if FLAGS.multi_gpu: validate_batch_size_for_multi_gpu(FLAGS.batch_size) # There are two steps required if using multi-GPU: (1) wrap the model_fn, # and (2) wrap the optimizer. The first happens here, and (2) happens # in the model_fn itself when the optimizer is defined. model_function = tf.contrib.estimator.replicate_model_fn( model_fn, loss_reduction=tf.losses.Reduction.MEAN) data_format = FLAGS.data_format if data_format is None: data_format = ('channels_first' if tf.test.is_built_with_cuda() else 'channels_last') mnist_classifier = tf.estimator.Estimator(model_fn=model_function, model_dir=FLAGS.model_dir, params={ 'data_format': data_format, 'multi_gpu': FLAGS.multi_gpu }) # Set up training and evaluation input functions. def train_input_fn(): """Prepare data for training.""" # When choosing shuffle buffer sizes, larger sizes result in better # randomness, while smaller sizes use less memory. MNIST is a small # enough dataset that we can easily shuffle the full epoch. ds = dataset.train(FLAGS.data_dir) ds = ds.cache().shuffle(buffer_size=50000).batch(FLAGS.batch_size) # Iterate through the dataset a set number (`epochs_between_evals`) of times # during each training session. ds = ds.repeat(FLAGS.epochs_between_evals) return ds def eval_input_fn(): return dataset.test(FLAGS.data_dir).batch( FLAGS.batch_size).make_one_shot_iterator().get_next() # Set up hook that outputs training logs every 100 steps. train_hooks = hooks_helper.get_train_hooks(FLAGS.hooks, batch_size=FLAGS.batch_size) # Train and evaluate model. for _ in range(FLAGS.train_epochs // FLAGS.epochs_between_evals): mnist_classifier.train(input_fn=train_input_fn, hooks=train_hooks) eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn) print('\nEvaluation results:\n\t%s\n' % eval_results) # Export the model if FLAGS.export_dir is not None: image = tf.placeholder(tf.float32, [None, 28, 28]) input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({ 'image': image, }) mnist_classifier.export_savedmodel(FLAGS.export_dir, input_fn)
def main(unused_argv): # Clean up the model directory if present shutil.rmtree(FLAGS.model_dir, ignore_errors=True) model = build_estimator(FLAGS.model_dir, FLAGS.model_type) train_file = os.path.join(FLAGS.data_dir, 'adult.data') test_file = os.path.join(FLAGS.data_dir, 'adult.test') train_hooks = hooks_helper.get_train_hooks(FLAGS.hooks, batch_size=FLAGS.batch_size, tensors_to_log={ 'average_loss': 'head/truediv', 'loss': 'head/weighted_loss/Sum' }) # Train and evaluate the model every `FLAGS.epochs_between_evals` epochs. for n in range(FLAGS.train_epochs // FLAGS.epochs_between_evals): model.train(input_fn=lambda: input_fn( train_file, FLAGS.epochs_between_evals, True, FLAGS.batch_size), hooks=train_hooks) results = model.evaluate( input_fn=lambda: input_fn(test_file, 1, False, FLAGS.batch_size)) # Display evaluation metrics print('Results at epoch', (n + 1) * FLAGS.epochs_between_evals) print('-' * 60) for key in sorted(results): print('%s: %s' % (key, results[key]))
def validate_train_hook_name(self, test_hook_name, expected_hook_name, **kwargs): returned_hook = hooks_helper.get_train_hooks([test_hook_name], **kwargs) self.assertEqual(len(returned_hook), 1) self.assertIsInstance(returned_hook[0], tf.train.SessionRunHook) self.assertEqual(returned_hook[0].__class__.__name__.lower(), expected_hook_name)
def resnet_main(flags, model_function, input_function): # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' if flags.multi_gpu: validate_batch_size_for_multi_gpu(flags.batch_size) # There are two steps required if using multi-GPU: (1) wrap the model_fn, # and (2) wrap the optimizer. The first happens here, and (2) happens # in the model_fn itself when the optimizer is defined. model_function = tf.contrib.estimator.replicate_model_fn( model_function, loss_reduction=tf.losses.Reduction.MEAN) # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) # Set up a RunConfig to save checkpoint and set session config. run_config = tf.estimator.RunConfig().replace( save_checkpoints_secs=1e9, session_config=session_config) classifier = tf.estimator.Estimator(model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'resnet_size': flags.resnet_size, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, 'version': flags.version, }) for _ in range(flags.train_epochs // flags.epochs_per_eval): train_hooks = hooks_helper.get_train_hooks(flags.hooks, batch_size=flags.batch_size) print('Starting a training cycle.') def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_per_eval, flags.num_parallel_calls, flags.multi_gpu) classifier.train(input_fn=input_fn_train, hooks=train_hooks) print('Starting to evaluate.') # Evaluate the model and print results def input_fn_eval(): return input_function(False, flags.data_dir, flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu) eval_results = classifier.evaluate(input_fn=input_fn_eval) print(eval_results)
def dan_main(flags, model_function, input_function, file_path=None): os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' if flags.multi_gpu: validate_batch_size_for_multi_gpu(flags.batch_size) model_function = tf.contrib.estimator.replicate_model_fn( model_function, loss_reduction=tf.losses.Reduction.MEAN) session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) run_config = tf.estimator.RunConfig().replace( save_checkpoints_secs=1e9, session_config=session_config) estimator = tf.estimator.Estimator(model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'dan_stage': flags.dan_stage, 'num_lmark': flags.num_lmark, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, }) # if flags.mode == tf.estimator.ModeKeys.PREDICT: # import cv2 # predict_results = estimator.predict(input_function) # for x in predict_results: # landmark = x['s2_ret'] # img = x['img'] # cv2.imshow('t',img) # cv2.waitKey(30) # return if flags.mode == tf.estimator.ModeKeys.PREDICT: print('**********************') ind_backslash = file_path.rfind("\\") ind_png = file_path.rfind(".png") print(file_path[ind_backslash + 1:ind_png]) file_name = file_path[ind_backslash + 1:ind_png] import cv2 predict_results = estimator.predict(input_function) for x in predict_results: landmark = x['s2_ret'] img = x['img'] img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) for lm in landmark: cv2.circle(img, (lm[0], lm[1]), 1, (0, 0, 255), -1) # cv2.imshow('t',img) # cv2.waitKey(30) print(file_path) cv2.imwrite('./results/' + file_name + '_pred.png', img) np.savetxt('./results/' + file_name + '_pred.pts', landmark, delimiter=" ", fmt='%i') return def input_fn_eval(): return input_function( False, flags.data_dir if flags.data_dir_test is not None else flags.data_dir_test, flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu) def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_per_eval, flags.num_parallel_calls, flags.multi_gpu) if flags.mode == tf.estimator.ModeKeys.EVAL: eval_results = estimator.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results) if flags.mode == tf.estimator.ModeKeys.TRAIN: for _ in range(flags.train_epochs // flags.epochs_per_eval): train_hooks = hooks_helper.get_train_hooks( ["LoggingTensorHook"], batch_size=flags.batch_size) print('************** Starting a training cycle.') estimator.train(input_fn=input_fn_train, max_steps=flags.max_train_steps) print('************** Starting to evaluate.') eval_results = estimator.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results)
def resnet_main(flags, model_function, input_function, shape=None): """Shared main loop for ResNet Models. Args: flags: FLAGS object that contains the params for running. See ResnetArgParser for created flags. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. shape: list of ints representing the shape of the images used for training. This is only used if flags.export_dir is passed. """ # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' if flags.multi_gpu: validate_batch_size_for_multi_gpu(flags.batch_size) # There are two steps required if using multi-GPU: (1) wrap the model_fn, # and (2) wrap the optimizer. The first happens here, and (2) happens # in the model_fn itself when the optimizer is defined. model_function = tf.contrib.estimator.replicate_model_fn( model_function, loss_reduction=tf.losses.Reduction.MEAN) # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) # Set up a RunConfig to save checkpoint and set session config. run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9, session_config=session_config) classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'resnet_size': flags.resnet_size, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, 'version': flags.version, }) if flags.benchmark_log_dir is not None: benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir) benchmark_logger.log_run_info("resnet") else: benchmark_logger = None for _ in range(flags.train_epochs // flags.epochs_between_evals): train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size, benchmark_log_dir=flags.benchmark_log_dir) print('Starting a training cycle.') def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_between_evals, flags.num_parallel_calls, flags.multi_gpu) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags.max_train_steps) print('Starting to evaluate.') # Evaluate the model and print results def input_fn_eval(): return input_function(False, flags.data_dir, flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu) # flags.max_train_steps is generally associated with testing and profiling. # As a result it is frequently called with synthetic data, which will # iterate forever. Passing steps=flags.max_train_steps allows the eval # (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results) if benchmark_logger: benchmark_logger.log_estimator_evaluation_result(eval_results) if flags.export_dir is not None: warn_on_multi_gpu_export(flags.multi_gpu) # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags.batch_size) classifier.export_savedmodel(flags.export_dir, input_receiver_fn)
def resnet_main(flags, model_function, input_function, shape=None): """Shared main loop for ResNet Models. Args: flags: FLAGS object that contains the params for running. See ResnetArgParser for created flags. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. shape: list of ints representing the shape of the images used for training. This is only used if flags.export_dir is passed. """ # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) if ALLOW_MULTIPLE_MODELS: session_config.gpu_options.allow_growth = True # Set up a RunConfig to save checkpoint and set session config. run_config = tf.estimator.RunConfig().replace( save_checkpoints_secs=5 * 60, # Save checkpoints every X minutes. keep_checkpoint_max=1000, # Retain the 1000 most recent checkpoints. #tf_random_seed = 5739, # Set random seed for "reproducible" results save_summary_steps=10000, # Number of steps between summaries session_config=session_config) classifier = tf.estimator.Estimator(model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'resnet_size': flags.resnet_size, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, 'version': flags.version, 'ncmmethod': flags.ncmmethod, 'ncmparam': flags.ncmparam, 'initial_learning_scale': flags.initial_learning_scale }) if flags.benchmark_log_dir is not None: benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir) benchmark_logger.log_run_info("resnet") else: benchmark_logger = None for _ in range(flags.train_epochs // flags.epochs_between_evals): train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size, benchmark_log_dir=flags.benchmark_log_dir) #tensors_to_log = {"iter": "m_iter","deep-cnt": "m_cnt", "deep-sum": "m_sum"} #logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=1) print('Starting a training cycle.') def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_between_evals, flags.num_parallel_calls, flags.multi_gpu) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags.max_train_steps) print('Starting to evaluate.') # Evaluate the model and print results def input_fn_eval(): return input_function(False, flags.data_dir, flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu) # flags.max_train_steps is generally associated with testing and profiling. # As a result it is frequently called with synthetic data, which will # iterate forever. Passing steps=flags.max_train_steps allows the eval # (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results) if benchmark_logger: benchmark_logger.log_estimator_evaluation_result(eval_results) if flags.export_dir is not None: # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags.batch_size) classifier.export_savedmodel(flags.export_dir, input_receiver_fn)
def main(argv): parser = MNISTArgParser() flags = parser.parse_args(args=argv[1:]) model_function = model_fn if flags.multi_gpu: validate_batch_size_for_multi_gpu(flags.batch_size) # There are two steps required if using multi-GPU: (1) wrap the model_fn, # and (2) wrap the optimizer. The first happens here, and (2) happens # in the model_fn itself when the optimizer is defined. model_function = tf.contrib.estimator.replicate_model_fn( model_fn, loss_reduction=tf.losses.Reduction.MEAN) data_format = flags.data_format if data_format is None: data_format = ('channels_first' if tf.test.is_built_with_cuda() else 'channels_last') mnist_classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags.model_dir, params={ 'data_format': data_format, 'multi_gpu': flags.multi_gpu }) # Set up training and evaluation input functions. def train_input_fn(): """Prepare data for training.""" # When choosing shuffle buffer sizes, larger sizes result in better # randomness, while smaller sizes use less memory. MNIST is a small # enough dataset that we can easily shuffle the full epoch. ds = dataset.train(flags.data_dir) ds = ds.cache().shuffle(buffer_size=50000).batch(flags.batch_size) # Iterate through the dataset a set number (`epochs_between_evals`) of times # during each training session. ds = ds.repeat(flags.epochs_between_evals) return ds def eval_input_fn(): return dataset.test(flags.data_dir).batch( flags.batch_size).make_one_shot_iterator().get_next() # Set up hook that outputs training logs every 100 steps. train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size) # Train and evaluate model. for _ in range(flags.train_epochs // flags.epochs_between_evals): mnist_classifier.train(input_fn=train_input_fn, hooks=train_hooks) eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn) print('\nEvaluation results:\n\t%s\n' % eval_results) # Export the model if flags.export_dir is not None: image = tf.placeholder(tf.float32, [None, 28, 28]) input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({ 'image': image, }) mnist_classifier.export_savedmodel(flags.export_dir, input_fn)
def test_raise_in_invalid_names(self): invalid_names = ['StepCounterHook', 'StopAtStepHook'] with self.assertRaises(ValueError): hooks_helper.get_train_hooks(invalid_names, batch_size=256)
def test_raise_in_non_list_names(self): with self.assertRaises(ValueError): hooks_helper.get_train_hooks( 'LoggingTensorHook, ProfilerHook', batch_size=256)
def resnet_main(flags, model_function, input_function): """Shared main loop for ResNet Models.""" # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' if flags.multi_gpu: validate_batch_size_for_multi_gpu(flags.batch_size) # There are two steps required if using multi-GPU: (1) wrap the model_fn, # and (2) wrap the optimizer. The first happens here, and (2) happens # in the model_fn itself when the optimizer is defined. model_function = tf.contrib.estimator.replicate_model_fn( model_function, loss_reduction=tf.losses.Reduction.MEAN) # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) # Set up a RunConfig to save checkpoint and set session config. run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9, session_config=session_config) classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'resnet_size': flags.resnet_size, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, 'version': flags.version, }) for _ in range(flags.train_epochs // flags.epochs_between_evals): train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size) print('Starting a training cycle.') def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_between_evals, flags.num_parallel_calls, flags.multi_gpu) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags.max_train_steps) print('Starting to evaluate.') # Evaluate the model and print results def input_fn_eval(): return input_function(False, flags.data_dir, flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu) # flags.max_train_steps is generally associated with testing and profiling. # As a result it is frequently called with synthetic data, which will # iterate forever. Passing steps=flags.max_train_steps allows the eval # (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results)
def resnet_main(flags, model_function, input_function, shape=None): """Shared main loop for ResNet Models. Args: flags: FLAGS object that contains the params for running. See ResnetArgParser for created flags. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. shape: list of ints representing the shape of the images used for training. This is only used if flags.export_dir is passed. """ # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' if flags.multi_gpu: validate_batch_size_for_multi_gpu(flags.batch_size) # There are two steps required if using multi-GPU: (1) wrap the model_fn, # and (2) wrap the optimizer. The first happens here, and (2) happens # in the model_fn itself when the optimizer is defined. model_function = tf.contrib.estimator.replicate_model_fn( model_function, loss_reduction=tf.losses.Reduction.MEAN) # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) # Set up a RunConfig to save checkpoint and set session config. run_config = tf.estimator.RunConfig().replace( save_checkpoints_secs=1e9, session_config=session_config) classifier = tf.estimator.Estimator(model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'resnet_size': flags.resnet_size, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, 'version': flags.version, }) if flags.benchmark_log_dir is not None: benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir) benchmark_logger.log_run_info("resnet") else: benchmark_logger = None for _ in range(flags.train_epochs // flags.epochs_between_evals): train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size, benchmark_log_dir=flags.benchmark_log_dir) print('Starting a training cycle.') def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_between_evals, flags.num_parallel_calls, flags.multi_gpu) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags.max_train_steps) print('Starting to evaluate.') # Evaluate the model and print results def input_fn_eval(): return input_function(False, flags.data_dir, flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu) # flags.max_train_steps is generally associated with testing and profiling. # As a result it is frequently called with synthetic data, which will # iterate forever. Passing steps=flags.max_train_steps allows the eval # (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results) if benchmark_logger: benchmark_logger.log_estimator_evaluation_result(eval_results) if flags.export_dir is not None: warn_on_multi_gpu_export(flags.multi_gpu) # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags.batch_size) classifier.export_savedmodel(flags.export_dir, input_receiver_fn)
def dan_main(flags, model_function, input_function): os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' if flags.multi_gpu: validate_batch_size_for_multi_gpu(flags.batch_size) model_function = tf.contrib.estimator.replicate_model_fn( model_function, loss_reduction=tf.losses.Reduction.MEAN) session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) run_config = tf.estimator.RunConfig().replace( save_checkpoints_secs=1e9, session_config=session_config) estimator = tf.estimator.Estimator(model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'dan_stage': flags.dan_stage, 'num_lmark': flags.num_lmark, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, }) if flags.mode == tf.estimator.ModeKeys.PREDICT: import glob import cv2 import numpy as np def get_filenames(data_dir): listext = ['*.png', '*.jpg'] imagelist = [] for ext in listext: p = os.path.join(data_dir, ext) imagelist.extend(glob.glob(p)) return imagelist def make_safely_folder(directory): try: if not os.path.exists(directory): os.makedirs(directory) except OSError: print('Error: Creating directory. ' + directory) predict_results = estimator.predict(input_function) save_path = './prep/predict' make_safely_folder(save_path) img_path_list = get_filenames(flags.data_dir) img_path_generator = (x for x in img_path_list) for x in predict_results: landmark = x['s2_ret'] img = x['img'] img_path = next(img_path_generator) filename, _ = os.path.splitext(os.path.basename(img_path)) np.savetxt(os.path.join(save_path, filename + '.ptv'), landmark, delimiter=',') return def input_fn_eval(): return input_function( False, flags.data_dir if flags.data_dir_test is not None else flags.data_dir_test, flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu) def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_per_eval, flags.num_parallel_calls, flags.multi_gpu) if flags.mode == tf.estimator.ModeKeys.EVAL: eval_results = estimator.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results) if flags.mode == tf.estimator.ModeKeys.TRAIN: for _ in range(flags.train_epochs // flags.epochs_per_eval): train_hooks = hooks_helper.get_train_hooks( ["LoggingTensorHook"], batch_size=flags.batch_size) print('Starting a training cycle.') estimator.train(input_fn=input_fn_train, max_steps=flags.max_train_steps) print('Starting to evaluate.') eval_results = estimator.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results)