def main(argv): del argv # Unused. params = params_dict.ParamsDict(unet_config.UNET_CONFIG, unet_config.UNET_RESTRICTIONS) params = params_dict.override_params_dict(params, FLAGS.config_file, is_strict=False) if FLAGS.training_file_pattern: params.override({'training_file_pattern': FLAGS.training_file_pattern}, is_strict=True) if FLAGS.eval_file_pattern: params.override({'eval_file_pattern': FLAGS.eval_file_pattern}, is_strict=True) train_epoch_steps = params.train_item_count // params.train_batch_size eval_epoch_steps = params.eval_item_count // params.eval_batch_size params.override( { 'model_dir': FLAGS.model_dir, 'min_eval_interval': FLAGS.min_eval_interval, 'eval_timeout': FLAGS.eval_timeout, 'tpu_config': tpu_executor.get_tpu_flags(), 'lr_decay_steps': train_epoch_steps, 'train_steps': params.train_epochs * train_epoch_steps, 'eval_steps': eval_epoch_steps, }, is_strict=False) params = params_dict.override_params_dict(params, FLAGS.params_override, is_strict=True) params.validate() params.lock() train_input_fn = None eval_input_fn = None train_input_shapes = None eval_input_shapes = None if FLAGS.mode in ('train', 'train_and_eval'): train_input_fn = input_reader.LiverInputFn( params.training_file_pattern, params, mode=tf.estimator.ModeKeys.TRAIN) train_input_shapes = train_input_fn.get_input_shapes(params) if FLAGS.mode in ('eval', 'train_and_eval'): eval_input_fn = input_reader.LiverInputFn( params.eval_file_pattern, params, mode=tf.estimator.ModeKeys.EVAL) eval_input_shapes = eval_input_fn.get_input_shapes(params) assert train_input_shapes is not None or eval_input_shapes is not None run_executer(params, train_input_shapes=train_input_shapes, eval_input_shapes=eval_input_shapes, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn)
def _serving_model_fn(features, labels, mode, params): """Builds the serving model_fn.""" del labels # unused. if mode != tf.estimator.ModeKeys.PREDICT: raise ValueError('To build the serving model_fn, set ' 'mode = `tf.estimator.ModeKeys.PREDICT`') model_params = params_dict.ParamsDict(params) serving_model_graph = serving_model_graph_builder( output_image_info, output_normalized_coordinates, cast_num_detections_to_float) model_outputs = serving_model_graph(features, model_params) predictions = { 'num_detections': tf.identity(model_outputs['num_detections'], 'NumDetections'), 'detection_boxes': tf.identity(model_outputs['detection_boxes'], 'DetectionBoxes'), 'detection_classes': tf.identity(model_outputs['detection_classes'], 'DetectionClasses'), 'detection_scores': tf.identity(model_outputs['detection_scores'], 'DetectionScores'), } if output_image_info: predictions['image_info'] = tf.identity( model_outputs['image_info'], 'ImageInfo') if export_tpu_model: return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, predictions=predictions) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
def main(_): config = params_dict.ParamsDict(mask_rcnn_config.MASK_RCNN_CFG, mask_rcnn_config.MASK_RCNN_RESTRICTIONS) config = params_dict.override_params_dict( config, FLAGS.config, is_strict=True) config.is_training_bn = False config.train_batch_size = FLAGS.batch_size config.eval_batch_size = FLAGS.batch_size config.validate() config.lock() model_params = dict( list(config.as_dict().items()), use_tpu=FLAGS.use_tpu, mode=tf.estimator.ModeKeys.PREDICT, transpose_input=False) print(' - Setting up TPUEstimator...') estimator = tf.estimator.tpu.TPUEstimator( model_fn=serving.serving_model_fn_builder( FLAGS.output_source_id, FLAGS.output_image_info, FLAGS.output_box_features, FLAGS.output_normalized_coordinates, FLAGS.cast_num_detections_to_float), model_dir=FLAGS.model_dir, config=tpu_config.RunConfig( tpu_config=tpu_config.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop), master='local', evaluation_master='local'), params=model_params, use_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.batch_size, predict_batch_size=FLAGS.batch_size, export_to_tpu=FLAGS.use_tpu, export_to_cpu=True) print(' - Exporting the model...') input_type = FLAGS.input_type export_path = estimator.export_saved_model( export_dir_base=FLAGS.export_dir, serving_input_receiver_fn=functools.partial( serving.serving_input_fn, batch_size=FLAGS.batch_size, desired_image_size=config.image_size, padding_stride=(2**config.max_level), input_type=input_type, input_name=FLAGS.input_name), checkpoint_path=FLAGS.checkpoint_path) if FLAGS.add_warmup_requests and input_type == 'image_bytes': inference_warmup.write_warmup_requests( export_path, FLAGS.model_name, config.image_size, batch_sizes=[FLAGS.batch_size], image_format='JPEG', input_signature=FLAGS.input_name) print(' - Done! path: %s' % export_path)
def config_generator(model): """Model function generator.""" if model == 'retinanet': default_config = retinanet_config.RETINANET_CFG restrictions = retinanet_config.RETINANET_RESTRICTIONS else: raise ValueError('Model %s is not supported.' % model) return params_dict.ParamsDict(default_config, restrictions)
def config_generator(model): """Model function generator.""" if model == 'attribute_mask_rcnn': default_config = model_config.CFG restrictions = model_config.RESTRICTIONS else: raise ValueError('Model %s is not supported.' % model) return params_dict.ParamsDict(default_config, restrictions)
def main(argv): del argv # Unused. params = params_dict.ParamsDict(retinanet_config.RETINANET_CFG, retinanet_config.RETINANET_RESTRICTIONS) params = params_dict.override_params_dict(params, FLAGS.params_overrides, is_strict=True) params.validate() params.lock() model_params = dict(params.as_dict(), use_tpu=FLAGS.use_tpu, mode=tf.estimator.ModeKeys.PREDICT, transpose_input=False) print(' - Setting up TPUEstimator...') estimator = tf.contrib.tpu.TPUEstimator( model_fn=serving.serving_model_fn_builder( FLAGS.use_tpu, FLAGS.output_image_info, FLAGS.output_normalized_coordinates, FLAGS.cast_num_detections_to_float), model_dir=None, config=tpu_config.RunConfig( tpu_config=tpu_config.TPUConfig(iterations_per_loop=1), master='local', evaluation_master='local'), params=model_params, use_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.batch_size, predict_batch_size=FLAGS.batch_size, export_to_tpu=FLAGS.use_tpu, export_to_cpu=True) print(' - Exporting the model...') input_type = FLAGS.input_type image_size = [int(x) for x in FLAGS.input_image_size.split(',')] export_path = estimator.export_saved_model( export_dir_base=FLAGS.export_dir, serving_input_receiver_fn=functools.partial( serving.serving_input_fn, batch_size=FLAGS.batch_size, desired_image_size=image_size, stride=(2**params.anchor.max_level), input_type=input_type, input_name=FLAGS.input_name), checkpoint_path=FLAGS.checkpoint_path) print(' - Done! path: %s' % export_path)
def prepare_evaluation(self): """Preapre for evaluation.""" val_json_file = os.path.join(self._params.model_dir, 'eval_annotation_file.json') if self._params.eval.val_json_file: tf.gfile.Copy(self._params.eval.val_json_file, val_json_file) else: coco_utils.scan_and_generator_annotation_file( self._params.eval.eval_file_pattern, self._params.eval.eval_samples, include_mask=False, annotation_file=val_json_file) eval_params = params_dict.ParamsDict(self._params.eval) eval_params.override({'val_json_file': val_json_file}) self._evaluator = factory.evaluator_generator(eval_params)
def config_generator(model): """Model function generator.""" if model == 'retinanet': default_config = retinanet_config.RETINANET_CFG restrictions = retinanet_config.RETINANET_RESTRICTIONS elif model == 'shapemask': default_config = shapemask_config.SHAPEMASK_CFG restrictions = shapemask_config.SHAPEMASK_RESTRICTIONS elif model == 'segmentation': default_config = segmentation_config.SEGMENTATION_CFG restrictions = segmentation_config.SEGMENTATION_RESTRICTIONS else: raise ValueError('Model %s is not supported.' % model) return params_dict.ParamsDict(default_config, restrictions)
def main(argv): del argv # Unused. # Configure parameters. params = params_dict.ParamsDict(mask_rcnn_config.MASK_RCNN_CFG, mask_rcnn_config.MASK_RCNN_RESTRICTIONS) params = params_dict.override_params_dict(params, FLAGS.config_file, is_strict=True) params = params_dict.override_params_dict(params, FLAGS.params_override, is_strict=True) params = flags_to_params.override_params_from_input_flags(params, FLAGS) params.validate() params.lock() # Check data path train_input_fn = None eval_input_fn = None if (FLAGS.mode in ('train', 'train_and_eval') and not params.training_file_pattern): raise RuntimeError( 'You must specify `training_file_pattern` for training.') if FLAGS.mode in ('eval', 'train_and_eval'): if not params.validation_file_pattern: raise RuntimeError('You must specify `validation_file_pattern` ' 'for evaluation.') if not params.val_json_file and not params.include_groundtruth_in_features: raise RuntimeError( 'You must specify `val_json_file` or ' 'include_groundtruth_in_features=True for evaluation.') if FLAGS.mode in ('train', 'train_and_eval'): train_input_fn = dataloader.InputReader( params.training_file_pattern, mode=tf.estimator.ModeKeys.TRAIN, use_fake_data=FLAGS.use_fake_data, use_instance_mask=params.include_mask) if (FLAGS.mode in ('eval', 'train_and_eval') or (FLAGS.mode == 'train' and FLAGS.eval_after_training)): eval_input_fn = dataloader.InputReader( params.validation_file_pattern, mode=tf.estimator.ModeKeys.PREDICT, num_examples=params.eval_samples, use_instance_mask=params.include_mask) run_executer(params, train_input_fn, eval_input_fn)
def _serving_model_fn(features, labels, mode, params): """Builds the serving model_fn.""" del labels # unused. if mode != tf.estimator.ModeKeys.PREDICT: raise ValueError('To build the serving model_fn, set ' 'mode = `tf.estimator.ModeKeys.PREDICT`') model_params = params_dict.ParamsDict(params) serving_model_graph = serving_model_graph_builder( output_image_info, output_normalized_coordinates, cast_num_detections_to_float) predictions = serving_model_graph(features, model_params) if export_tpu_model: return tf.estimator.tpu.TPUEstimatorSpec(mode=mode, predictions=predictions) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
def config_generator(model): """Model function generator.""" if model == 'classification': default_config = classification_config.CLASSIFICATION_CFG restrictions = classification_config.CLASSIFICATION_RESTRICTIONS elif model == 'retinanet': default_config = retinanet_config.RETINANET_CFG restrictions = retinanet_config.RETINANET_RESTRICTIONS elif model == 'mask_rcnn': default_config = maskrcnn_config.MASKRCNN_CFG restrictions = maskrcnn_config.MASKRCNN_RESTRICTIONS elif model == 'shapemask': default_config = shapemask_config.SHAPEMASK_CFG restrictions = shapemask_config.SHAPEMASK_RESTRICTIONS else: raise ValueError('Model %s is not supported.' % model) return params_dict.ParamsDict(default_config, restrictions)
def main(_): params = params_dict.ParamsDict(unet_config.UNET_CONFIG, unet_config.UNET_RESTRICTIONS) params = params_dict.override_params_dict(params, FLAGS.config_file, is_strict=False) params.train_batch_size = FLAGS.batch_size params.eval_batch_size = FLAGS.batch_size params.use_bfloat16 = False model_params = dict(params.as_dict(), use_tpu=FLAGS.use_tpu, mode=tf.estimator.ModeKeys.PREDICT, transpose_input=False) print(' - Setting up TPUEstimator...') estimator = tf.estimator.tpu.TPUEstimator( model_fn=serving_model_fn, model_dir=FLAGS.model_dir, config=tf.estimator.tpu.RunConfig( tpu_config=tf.estimator.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop), master='local', evaluation_master='local'), params=model_params, use_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.batch_size, predict_batch_size=FLAGS.batch_size, export_to_tpu=FLAGS.use_tpu, export_to_cpu=True) print(' - Exporting the model...') input_type = FLAGS.input_type export_path = estimator.export_saved_model( export_dir_base=FLAGS.export_dir, serving_input_receiver_fn=functools.partial( serving_input_fn, batch_size=FLAGS.batch_size, input_type=input_type, params=params, input_name=FLAGS.input_name), checkpoint_path=FLAGS.checkpoint_path) print(' - Done! path: %s' % export_path)
def filter_unused_blocks(params): """Filters unused architecture params blocks.""" filtered_params = params_dict.ParamsDict(params) if 'parser' in params.architecture.as_dict().keys(): for parser in _PARSERS: if (parser in params.as_dict().keys() and parser != params.architecture.parser): delattr(filtered_params, parser) if 'backbone' in params.architecture.as_dict().keys(): for backbone in _BACKBONES: if (backbone in params.as_dict().keys() and backbone != params.architecture.backbone): delattr(filtered_params, backbone) if 'multilevel_features' in params.architecture.as_dict().keys(): for features in _MULTILEVEL_FEATURES: if (features in params.as_dict().keys() and features != params.architecture.multilevel_features): delattr(filtered_params, features) return filtered_params
def prepare_evaluation(self): """Preapre for evaluation.""" eval_params = params_dict.ParamsDict(self._params.eval) if self._params.eval.use_json_file: val_json_file = os.path.join( self._params.model_dir, "eval_annotation_file.json" ) if self._params.eval.val_json_file: tf.io.gfile.copy( self._params.eval.val_json_file, val_json_file, overwrite=True ) else: coco_utils.scan_and_generator_annotation_file( self._params.eval.eval_file_pattern, self._params.eval.eval_samples, include_mask=False, annotation_file=val_json_file, dataset_type=self._params.eval.eval_dataset_type, ) eval_params.override({"val_json_file": val_json_file}) self._evaluator = factory.evaluator_generator(eval_params)
def prepare_evaluation(self): """Preapre for evaluation.""" eval_params = params_dict.ParamsDict(self._params.eval) if self._params.eval.type == 'box_and_mask': if (not self._params.eval.use_json_file or not self._params.eval.val_json_file): raise ValueError('If `eval.type` == `box_and_mask`, ' '`eval.val_json_file` is required.') if self._params.eval.use_json_file: val_json_file = os.path.join(self._params.model_dir, 'eval_annotation_file.json') if self._params.eval.val_json_file: tf.gfile.Copy( self._params.eval.val_json_file, val_json_file, overwrite=True) else: coco_utils.scan_and_generator_annotation_file( self._params.eval.eval_file_pattern, self._params.eval.eval_samples, include_mask=False, annotation_file=val_json_file, dataset_type=self._params.eval.eval_dataset_type) eval_params.override({'val_json_file': val_json_file}) self._evaluator = factory.evaluator_generator(eval_params)
def apply_pre_parser(dataset, mode): """Parses per-parser data and zips the parsed output to the input dataset. This method can be used to pre-process some data to pass additional parsed data to the main parser. It is mainly helpful when we want to combine multiple images. The data path and parsing method can be set via config.train.pre_parser_dataset.file_pattern and config.architecture.pre_parser. Fer example, for Copy-Paste augmentation the pre_parser should be set to 'extract_objects_parser' to parse pasting objects and then these data will be passed to the main parser of 'maskrcnn_parser_with_copy_paste'. Args: dataset: a tf.data.Dataset dataset. mode: Training mode string. Returns: tf.data.Dataset dataset. """ config_params_ = params_dict.ParamsDict(config_params) config_params_.architecture.parser = config_params.architecture.pre_parser dataset_p, pre_parser_fn = get_dataset( config_params_, config_params.train.pre_parser_dataset.file_pattern, config_params.train.pre_parser_dataset.dataset_type, mode) dataset_p = dataset_p.map( pre_parser_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE, deterministic=False) dataset_p = dataset_p.prefetch(tf.data.experimental.AUTOTUNE) dataset_p = dataset_p.filter( lambda data: tf.greater(data['num_groundtrtuhs'], 0)) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) dataset = tf.data.Dataset.zip((dataset, dataset_p)) return dataset
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Config template to train ShapeMask.""" from configs import detection_config from hyperparameters import params_dict # pylint: disable=line-too-long SHAPEMASK_RESNET_FROZEN_VAR_PREFIX = r'(resnet\d+/)conv2d(|_([1-9]|10))\/' SHAPEMASK_CFG = params_dict.ParamsDict(detection_config.DETECTION_CFG) SHAPEMASK_CFG.override( { 'type': 'shapemask', 'architecture': { 'parser': 'shapemask_parser', 'backbone': 'resnet', 'multilevel_features': 'fpn', 'outer_box_scale': 1.25, }, 'train': { 'total_steps': 45000, 'learning_rate': { 'learning_rate_steps': [30000, 40000], }, 'frozen_variable_prefix': SHAPEMASK_RESNET_FROZEN_VAR_PREFIX,
def main(unused_argv): params = params_dict.ParamsDict(mnasnet_config.MNASNET_CFG, mnasnet_config.MNASNET_RESTRICTIONS) params = params_dict.override_params_dict(params, FLAGS.config_file, is_strict=True) params = params_dict.override_params_dict(params, FLAGS.params_override, is_strict=True) params = flags_to_params.override_params_from_input_flags(params, FLAGS) additional_params = { 'steps_per_epoch': params.num_train_images / params.train_batch_size, 'quantized_training': FLAGS.quantized_training, } params = params_dict.override_params_dict(params, additional_params, is_strict=False) params.validate() params.lock() if FLAGS.tpu or params.use_tpu: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) else: tpu_cluster_resolver = None if params.use_async_checkpointing: save_checkpoints_steps = None else: save_checkpoints_steps = max(100, params.iterations_per_loop) config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=FLAGS.model_dir, save_checkpoints_steps=save_checkpoints_steps, log_step_count_steps=FLAGS.log_step_count_steps, session_config=tf.ConfigProto( graph_options=tf.GraphOptions( rewrite_options=rewriter_config_pb2.RewriterConfig( disable_meta_optimizer=True))), tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=params.iterations_per_loop, per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig .PER_HOST_V2)) # pylint: disable=line-too-long # Validates Flags. if params.precision == 'bfloat16' and params.use_keras: raise ValueError( 'Keras layers do not have full support to bfloat16 activation training.' ' You have set precision as %s and use_keras as %s' % (params.precision, params.use_keras)) # Initializes model parameters. mnasnet_est = tf.contrib.tpu.TPUEstimator( use_tpu=params.use_tpu, model_fn=mnasnet_model_fn, config=config, train_batch_size=params.train_batch_size, eval_batch_size=params.eval_batch_size, export_to_tpu=FLAGS.export_to_tpu, params=params.as_dict()) if FLAGS.mode == 'export_only': export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize) return # Input pipelines are slightly different (with regards to shuffling and # preprocessing) between training and evaluation. if FLAGS.bigtable_instance: tf.logging.info('Using Bigtable dataset, table %s', FLAGS.bigtable_table) select_train, select_eval = _select_tables_from_flags() imagenet_train, imagenet_eval = [ imagenet_input.ImageNetBigtableInput( is_training=is_training, use_bfloat16=False, transpose_input=params.transpose_input, selection=selection) for (is_training, selection) in [(True, select_train), (False, select_eval)] ] else: if FLAGS.data_dir == FAKE_DATA_DIR: tf.logging.info('Using fake dataset.') else: tf.logging.info('Using dataset: %s', FLAGS.data_dir) imagenet_train, imagenet_eval = [ imagenet_input.ImageNetInput( is_training=is_training, data_dir=FLAGS.data_dir, transpose_input=params.transpose_input, cache=params.use_cache and is_training, image_size=params.input_image_size, num_parallel_calls=params.num_parallel_calls, use_bfloat16=(params.precision == 'bfloat16')) for is_training in [True, False] ] if FLAGS.mode == 'eval': eval_steps = params.num_eval_images // params.eval_batch_size # Run evaluation when there's a new checkpoint for ckpt in evaluation.checkpoints_iterator( FLAGS.model_dir, timeout=FLAGS.eval_timeout): tf.logging.info('Starting to evaluate.') try: start_timestamp = time.time( ) # This time will include compilation time eval_results = mnasnet_est.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, checkpoint_path=ckpt) elapsed_time = int(time.time() - start_timestamp) tf.logging.info('Eval results: %s. Elapsed seconds: %d', eval_results, elapsed_time) utils.archive_ckpt(eval_results, eval_results['top_1_accuracy'], ckpt) # Terminate eval job when final checkpoint is reached current_step = int(os.path.basename(ckpt).split('-')[1]) if current_step >= params.train_steps: tf.logging.info( 'Evaluation finished after training step %d', current_step) break except tf.errors.NotFoundError: # Since the coordinator is on a different job than the TPU worker, # sometimes the TPU worker does not finish initializing until long after # the CPU job tells it to start evaluating. In this case, the checkpoint # file could have been deleted already. tf.logging.info( 'Checkpoint %s no longer exists, skipping checkpoint', ckpt) if FLAGS.export_dir: export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize) else: # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval' current_step = estimator._load_global_step_from_checkpoint_dir( # pylint: disable=protected-access FLAGS.model_dir) tf.logging.info( 'Training for %d steps (%.2f epochs in total). Current' ' step %d.', params.train_steps, params.train_steps / params.steps_per_epoch, current_step) start_timestamp = time.time( ) # This time will include compilation time if FLAGS.mode == 'train': hooks = [] if params.use_async_checkpointing: hooks.append( async_checkpoint.AsyncCheckpointSaverHook( checkpoint_dir=FLAGS.model_dir, save_steps=max(100, params.iterations_per_loop))) mnasnet_est.train(input_fn=imagenet_train.input_fn, max_steps=params.train_steps, hooks=hooks) else: assert FLAGS.mode == 'train_and_eval' while current_step < params.train_steps: # Train for up to steps_per_eval number of steps. # At the end of training, a checkpoint will be written to --model_dir. next_checkpoint = min(current_step + FLAGS.steps_per_eval, params.train_steps) mnasnet_est.train(input_fn=imagenet_train.input_fn, max_steps=next_checkpoint) current_step = next_checkpoint tf.logging.info( 'Finished training up to step %d. Elapsed seconds %d.', next_checkpoint, int(time.time() - start_timestamp)) # Evaluate the model on the most recent model in --model_dir. # Since evaluation happens in batches of --eval_batch_size, some images # may be excluded modulo the batch size. As long as the batch size is # consistent, the evaluated images are also consistent. tf.logging.info('Starting to evaluate.') eval_results = mnasnet_est.evaluate( input_fn=imagenet_eval.input_fn, steps=params.num_eval_images // params.eval_batch_size) tf.logging.info('Eval results at step %d: %s', next_checkpoint, eval_results) ckpt = tf.train.latest_checkpoint(FLAGS.model_dir) utils.archive_ckpt(eval_results, eval_results['top_1_accuracy'], ckpt) elapsed_time = int(time.time() - start_timestamp) tf.logging.info( 'Finished training up to step %d. Elapsed seconds %d.', params.train_steps, elapsed_time) if FLAGS.export_dir: export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize)
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Config template to train Mask R-CNN.""" from configs import base_config from hyperparameters import params_dict # pylint: disable=line-too-long MASKRCNN_CFG = params_dict.ParamsDict(base_config.BASE_CFG) MASKRCNN_CFG.override({ 'type': 'mask_rcnn', 'eval': { 'type': 'box_and_mask', }, 'architecture': { 'parser': 'maskrcnn_parser', 'backbone': 'resnet', 'multilevel_features': 'fpn', 'use_bfloat16': True, 'include_mask': False, }, 'maskrcnn_parser': { 'use_bfloat16': True, 'output_size': [1024, 1024],
def main(argv): del argv # Unused. params = params_dict.ParamsDict(retinanet_config.RETINANET_CFG, retinanet_config.RETINANET_RESTRICTIONS) params = params_dict.override_params_dict(params, FLAGS.params_overrides, is_strict=True) params.override( { 'platform': { 'eval_master': FLAGS.eval_master, 'tpu': FLAGS.tpu, 'tpu_zone': FLAGS.tpu_zone, 'gcp_project': FLAGS.gcp_project, }, 'use_tpu': FLAGS.use_tpu, 'model_dir': FLAGS.model_dir, 'train': { 'num_shards': FLAGS.num_cores, }, }, is_strict=False) params.validate() params.lock() pp = pprint.PrettyPrinter() params_str = pp.pformat(params.as_dict()) tf.logging.info('Model Parameters: {}'.format(params_str)) # Builds detection model on TPUs. model_fn = model_builder.ModelFn(params) executor = tpu_executor.TpuExecutor(model_fn, params) # Prepares input functions for train and eval. train_input_fn = input_reader.InputFn(params.train.train_file_pattern, params, mode=ModeKeys.TRAIN) eval_input_fn = input_reader.InputFn(params.eval.eval_file_pattern, params, mode=ModeKeys.PREDICT_WITH_GT) # Runs the model. if FLAGS.mode == 'train': save_config(params, params.model_dir) executor.train(train_input_fn, params.train.total_steps) if FLAGS.eval_after_training: executor.evaluate( eval_input_fn, params.eval.eval_samples // params.predict.predict_batch_size) elif FLAGS.mode == 'eval': def terminate_eval(): tf.logging.info( 'Terminating eval after %d seconds of no checkpoints' % params.eval.eval_timeout) return True # Runs evaluation when there's a new checkpoint. for ckpt in tf.contrib.training.checkpoints_iterator( params.model_dir, min_interval_secs=params.eval.min_eval_interval, timeout=params.eval.eval_timeout, timeout_fn=terminate_eval): # Terminates eval job when final checkpoint is reached. current_step = int(os.path.basename(ckpt).split('-')[1]) tf.logging.info('Starting to evaluate.') try: executor.evaluate( eval_input_fn, params.eval.eval_samples // params.predict.predict_batch_size, ckpt) if current_step >= params.train.total_steps: tf.logging.info( 'Evaluation finished after training step %d' % current_step) break except tf.errors.NotFoundError: # Since the coordinator is on a different job than the TPU worker, # sometimes the TPU worker does not finish initializing until long after # the CPU job tells it to start evaluating. In this case, the checkpoint # file could have been deleted already. tf.logging.info( 'Checkpoint %s no longer exists, skipping checkpoint' % ckpt) elif FLAGS.mode == 'train_and_eval': save_config(params, params.model_dir) num_cycles = int(params.train.total_steps / params.eval.num_steps_per_eval) for cycle in range(num_cycles): tf.logging.info('Start training cycle %d.' % cycle) current_cycle_last_train_step = ((cycle + 1) * params.eval.num_steps_per_eval) executor.train(train_input_fn, current_cycle_last_train_step) executor.evaluate( eval_input_fn, params.eval.eval_samples // params.predict.predict_batch_size) else: tf.logging.info('Mode not found.')
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Config template to train Segmentation.""" from configs import base_config from hyperparameters import params_dict # pylint: disable=line-too-long RESNET_FROZEN_VAR_PREFIX = r'(resnet\d+)\/(conv2d(|_([1-9]|10))|batch_normalization(|_([1-9]|10)))\/' SEGMENTATION_CFG = params_dict.ParamsDict(base_config.BASE_CFG) SEGMENTATION_CFG.override( { 'type': 'segmentation', 'architecture': { 'parser': 'segmentation_parser', 'backbone': 'resnet', 'multilevel_features': 'fpn', 'use_aspp': False, 'use_pyramid_fusion': False, 'num_classes': 21, # Include background class 0. }, 'train': { 'train_batch_size': 64, 'total_steps': 10000, 'learning_rate': {
from configs import base_config from hyperparameters import params_dict # pylint: disable=line-too-long # For ResNet, this freezes the variables of the first conv1 and conv2_x # layers [1], which leads to higher training speed and slightly better testing # accuracy. The intuition is that the low-level architecture (e.g., ResNet-50) # is able to capture low-level features such as edges; therefore, it does not # need to be fine-tuned for the detection task. # Note that we need to trailing `/` to avoid the incorrect match. # [1]: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L198 RESNET_FROZEN_VAR_PREFIX = r'(resnet\d+)\/(conv2d(|_([1-9]|10))|batch_normalization(|_([1-9]|10)))\/' DETECTION_CFG = params_dict.ParamsDict(base_config.BASE_CFG) DETECTION_CFG.override({ 'architecture': { # Note that `num_classes` is the total number of classes including # one background classes whose index is 0. 'num_classes': 91 }, 'eval': { 'type': 'box', # Setting `eval_samples` = None will exhaust all the samples in the eval # dataset once. This only works if `type` != customized. 'eval_samples': None, 'use_json_file': True, 'val_json_file': '', 'per_category_metrics': False, },
def _serving_model_fn(features, labels, mode, params): """Builds the serving model_fn.""" del labels # unused. if mode != tf.estimator.ModeKeys.PREDICT: raise ValueError('To build the serving model_fn, set ' 'mode = `tf.estimator.ModeKeys.PREDICT`') model_params = params_dict.ParamsDict(params) images = features['images'] _, height, width, _ = images.get_shape().as_list() model_fn = factory.model_generator(model_params) outputs = model_fn.build_outputs( features['images'], labels=None, mode=mode_keys.PREDICT) logits = tf.image.resize_bilinear( outputs['logits'], tf.shape(images)[1:3], align_corners=False) original_image_size = tf.squeeze(features['image_info'][:, 0:1, :]) height = original_image_size[0] width = original_image_size[1] offset_height = tf.zeros_like(height, dtype=tf.int32) offset_width = tf.zeros_like(width, dtype=tf.int32) # Clip the predictions to original image size. logits = tf.image.crop_to_bounding_box(logits, offset_height, offset_width, tf.cast(height, dtype=tf.int32), tf.cast(width, dtype=tf.int32)) probabilities = tf.nn.softmax(logits) score_threshold_placeholder = features['score_thresholds'] key_placeholder = features['key'] score_threshold_pred_expanded = score_threshold_placeholder for _ in range(0, logits.shape.ndims - 1): score_threshold_pred_expanded = tf.expand_dims( score_threshold_pred_expanded, -1) scores = tf.where(probabilities > score_threshold_pred_expanded, probabilities, tf.zeros_like(probabilities)) scores = tf.reduce_max(scores, 3) scores = tf.expand_dims(scores, -1) scores = tf.cast(tf.minimum(scores * 255.0, 255), tf.uint8) categories = tf.to_int32(tf.expand_dims(tf.argmax(probabilities, 3), -1)) # Generate images for scores and categories. score_bytes = tf.map_fn( tf.image.encode_png, scores, back_prop=False, dtype=tf.string) category_bytes = tf.map_fn( tf.image.encode_png, tf.cast(categories, tf.uint8), back_prop=False, dtype=tf.string) predictions = {} predictions['category_bytes'] = tf.identity( category_bytes, name='category_bytes') predictions['score_bytes'] = tf.identity(score_bytes, name='score_bytes') predictions['key'] = tf.identity(key_placeholder, name='key') if output_image_info: predictions['image_info'] = tf.identity( features['image_info'], name='image_info') if export_tpu_model: return tf.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
def main(unused_argv): params = params_dict.ParamsDict( resnet_config.RESNET_CFG, resnet_config.RESNET_RESTRICTIONS) params = params_dict.override_params_dict( params, FLAGS.config_file, is_strict=True) params = params_dict.override_params_dict( params, FLAGS.params_override, is_strict=True) params = flags_to_params.override_params_from_input_flags(params, FLAGS) params.validate() params.lock() tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu if (FLAGS.tpu or params.use_tpu) else '', zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) if params.use_async_checkpointing: save_checkpoints_steps = None else: save_checkpoints_steps = max(5000, params.iterations_per_loop) config = tf.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=FLAGS.model_dir, save_checkpoints_steps=save_checkpoints_steps, log_step_count_steps=FLAGS.log_step_count_steps, session_config=tf.ConfigProto( graph_options=tf.GraphOptions( rewrite_options=rewriter_config_pb2.RewriterConfig( disable_meta_optimizer=True))), tpu_config=tf.estimator.tpu.TPUConfig( iterations_per_loop=params.iterations_per_loop, num_shards=params.num_cores, per_host_input_for_training=tf.estimator.tpu.InputPipelineConfig .PER_HOST_V2)) # pylint: disable=line-too-long resnet_classifier = tf.estimator.tpu.TPUEstimator( use_tpu=params.use_tpu, model_fn=resnet_model_fn, config=config, params=params.as_dict(), train_batch_size=params.train_batch_size, eval_batch_size=params.eval_batch_size, export_to_tpu=FLAGS.export_to_tpu) assert (params.precision == 'bfloat16' or params.precision == 'float32'), ( 'Invalid value for precision parameter; ' 'must be bfloat16 or float32.') tf.logging.info('Precision: %s', params.precision) use_bfloat16 = params.precision == 'bfloat16' # Input pipelines are slightly different (with regards to shuffling and # preprocessing) between training and evaluation. if FLAGS.bigtable_instance: tf.logging.info('Using Bigtable dataset, table %s', FLAGS.bigtable_table) select_train, select_eval = _select_tables_from_flags() imagenet_train, imagenet_eval = [ imagenet_input.ImageNetBigtableInput( # pylint: disable=g-complex-comprehension is_training=is_training, use_bfloat16=use_bfloat16, transpose_input=params.transpose_input, selection=selection, augment_name=FLAGS.augment_name, randaug_num_layers=FLAGS.randaug_num_layers, randaug_magnitude=FLAGS.randaug_magnitude) for (is_training, selection) in [(True, select_train), (False, select_eval)] ] else: if FLAGS.data_dir == FAKE_DATA_DIR: tf.logging.info('Using fake dataset.') else: tf.logging.info('Using dataset: %s', FLAGS.data_dir) imagenet_train, imagenet_eval = [ imagenet_input.ImageNetInput( # pylint: disable=g-complex-comprehension is_training=is_training, data_dir=FLAGS.data_dir, transpose_input=params.transpose_input, cache=params.use_cache and is_training, image_size=params.image_size, num_parallel_calls=params.num_parallel_calls, include_background_label=(params.num_label_classes == 1001), use_bfloat16=use_bfloat16, augment_name=FLAGS.augment_name, randaug_num_layers=FLAGS.randaug_num_layers, randaug_magnitude=FLAGS.randaug_magnitude) for is_training in [True, False] ] steps_per_epoch = params.num_train_images // params.train_batch_size eval_steps = params.num_eval_images // params.eval_batch_size if FLAGS.mode == 'eval': # Run evaluation when there's a new checkpoint for ckpt in tf.train.checkpoints_iterator( FLAGS.model_dir, timeout=FLAGS.eval_timeout): tf.logging.info('Starting to evaluate.') try: start_timestamp = time.time() # This time will include compilation time eval_results = resnet_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, checkpoint_path=ckpt) elapsed_time = int(time.time() - start_timestamp) tf.logging.info('Eval results: %s. Elapsed seconds: %d', eval_results, elapsed_time) # Terminate eval job when final checkpoint is reached current_step = int(os.path.basename(ckpt).split('-')[1]) if current_step >= params.train_steps: tf.logging.info( 'Evaluation finished after training step %d', current_step) break except tf.errors.NotFoundError: # Since the coordinator is on a different job than the TPU worker, # sometimes the TPU worker does not finish initializing until long after # the CPU job tells it to start evaluating. In this case, the checkpoint # file could have been deleted already. tf.logging.info( 'Checkpoint %s no longer exists, skipping checkpoint', ckpt) else: # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval' try: current_step = tf.train.load_variable(FLAGS.model_dir, tf.GraphKeys.GLOBAL_STEP) except (TypeError, ValueError, tf.errors.NotFoundError): current_step = 0 steps_per_epoch = params.num_train_images // params.train_batch_size tf.logging.info('Training for %d steps (%.2f epochs in total). Current' ' step %d.', params.train_steps, params.train_steps / steps_per_epoch, current_step) start_timestamp = time.time() # This time will include compilation time if FLAGS.mode == 'train': hooks = [] if params.use_async_checkpointing: try: from tensorflow.contrib.tpu.python.tpu import async_checkpoint # pylint: disable=g-import-not-at-top except ImportError as e: logging.exception( 'Async checkpointing is not supported in TensorFlow 2.x') raise e hooks.append( async_checkpoint.AsyncCheckpointSaverHook( checkpoint_dir=FLAGS.model_dir, save_steps=max(5000, params.iterations_per_loop))) if FLAGS.profile_every_n_steps > 0: hooks.append( tpu_profiler_hook.TPUProfilerHook( save_steps=FLAGS.profile_every_n_steps, output_dir=FLAGS.model_dir, tpu=FLAGS.tpu) ) resnet_classifier.train( input_fn=imagenet_train.input_fn, max_steps=params.train_steps, hooks=hooks) else: assert FLAGS.mode == 'train_and_eval' while current_step < params.train_steps: # Train for up to steps_per_eval number of steps. # At the end of training, a checkpoint will be written to --model_dir. next_checkpoint = min(current_step + FLAGS.steps_per_eval, params.train_steps) resnet_classifier.train( input_fn=imagenet_train.input_fn, max_steps=next_checkpoint) current_step = next_checkpoint tf.logging.info('Finished training up to step %d. Elapsed seconds %d.', next_checkpoint, int(time.time() - start_timestamp)) # Evaluate the model on the most recent model in --model_dir. # Since evaluation happens in batches of --eval_batch_size, some images # may be excluded modulo the batch size. As long as the batch size is # consistent, the evaluated images are also consistent. tf.logging.info('Starting to evaluate.') eval_results = resnet_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=params.num_eval_images // params.eval_batch_size) tf.logging.info('Eval results at step %d: %s', next_checkpoint, eval_results) elapsed_time = int(time.time() - start_timestamp) tf.logging.info('Finished training up to step %d. Elapsed seconds %d.', params.train_steps, elapsed_time) if FLAGS.export_dir is not None: # The guide to serve a exported TensorFlow model is at: # https://www.tensorflow.org/serving/serving_basic tf.logging.info('Starting to export model.') export_path = resnet_classifier.export_saved_model( export_dir_base=FLAGS.export_dir, serving_input_receiver_fn=imagenet_input.image_serving_input_fn) if FLAGS.add_warmup_requests: inference_warmup.write_warmup_requests( export_path, FLAGS.model_name, params.image_size, batch_sizes=FLAGS.inference_batch_sizes, image_format='JPEG')
def main(unused_argv): params = params_dict.ParamsDict(squeezenet_config.SQUEEZENET_CFG, squeezenet_config.SQUEEZENET_RESTRICTIONS) params = params_dict.override_params_dict(params, FLAGS.config_file, is_strict=True) params = params_dict.override_params_dict(params, FLAGS.params_override, is_strict=True) params = flags_to_params.override_params_from_input_flags(params, FLAGS) total_steps = ( (params.train.num_epochs * params.train.num_examples_per_epoch) // params.train.train_batch_size) params.override( { "train": { "total_steps": total_steps }, "eval": { "num_steps_per_eval": (total_steps // params.eval.num_evals) }, }, is_strict=False) params.validate() params.lock() tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) if not params.use_async_checkpointing: save_checkpoints_steps = max(5000, params.train.iterations_per_loop) run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=params.model_dir, save_checkpoints_steps=save_checkpoints_steps, session_config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False), tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=params.train.iterations_per_loop, num_shards=params.train.num_cores_per_replica, ), ) estimator = contrib_tpu.TPUEstimator( model_fn=squeezenet_model.model_fn, use_tpu=params.use_tpu, config=run_config, train_batch_size=params.train.train_batch_size, eval_batch_size=params.eval.eval_batch_size, params=params.as_dict(), ) for eval_cycle in range(params.eval.num_evals): current_cycle_last_train_step = ((eval_cycle + 1) * params.eval.num_steps_per_eval) estimator.train(input_fn=data_pipeline.InputReader(FLAGS.data_dir, is_training=True), steps=current_cycle_last_train_step) tf.logging.info("Running evaluation") tf.logging.info( "%s", estimator.evaluate(input_fn=data_pipeline.InputReader( FLAGS.data_dir, is_training=False), steps=(params.eval.num_eval_examples // params.eval.eval_batch_size)))
def main(argv): del argv # Unused. params = factory.config_generator(FLAGS.model) if FLAGS.config_file: params = params_dict.override_params_dict(params, FLAGS.config_file, is_strict=True) params = params_dict.override_params_dict(params, FLAGS.params_override, is_strict=True) params.override({ 'use_tpu': FLAGS.use_tpu, 'model_dir': FLAGS.model_dir, }, is_strict=True) if not FLAGS.use_tpu: params.override( { 'architecture': { 'use_bfloat16': False, }, 'batch_norm_activation': { 'use_sync_bn': False, }, }, is_strict=True) # Only run spatial partitioning in training mode. if FLAGS.mode != 'train': params.train.input_partition_dims = None params.train.num_cores_per_replica = None params_to_save = params_dict.ParamsDict(params) params.override( { 'platform': { 'eval_master': FLAGS.eval_master, 'tpu': FLAGS.tpu, 'tpu_zone': FLAGS.tpu_zone, 'gcp_project': FLAGS.gcp_project, }, 'tpu_job_name': FLAGS.tpu_job_name, 'train': { 'num_shards': FLAGS.num_cores, }, }, is_strict=False) params.validate() params.lock() pp = pprint.PrettyPrinter() params_str = pp.pformat(params.as_dict()) logging.info('Model Parameters: %s', params_str) # Builds detection model on TPUs. model_fn = model_builder.ModelFn(params) executor = tpu_executor.TpuExecutor(model_fn, params) # Prepares input functions for train and eval. train_input_fn = input_reader.InputFn( params.train.train_file_pattern, params, mode=ModeKeys.TRAIN, dataset_type=params.train.train_dataset_type) if params.eval.type == 'customized': eval_input_fn = input_reader.InputFn( params.eval.eval_file_pattern, params, mode=ModeKeys.EVAL, dataset_type=params.eval.eval_dataset_type) else: eval_input_fn = input_reader.InputFn( params.eval.eval_file_pattern, params, mode=ModeKeys.PREDICT_WITH_GT, dataset_type=params.eval.eval_dataset_type) if params.eval.eval_samples: eval_times = params.eval.eval_samples // params.eval.eval_batch_size else: eval_times = None # Runs the model. if FLAGS.mode == 'train': config_utils.save_config(params_to_save, params.model_dir) executor.train(train_input_fn, params.train.total_steps) if FLAGS.eval_after_training: executor.evaluate(eval_input_fn, eval_times) elif FLAGS.mode == 'eval': def terminate_eval(): logging.info('Terminating eval after %d seconds of no checkpoints', params.eval.eval_timeout) return True # Runs evaluation when there's a new checkpoint. for ckpt in tf.train.checkpoints_iterator( params.model_dir, min_interval_secs=params.eval.min_eval_interval, timeout=params.eval.eval_timeout, timeout_fn=terminate_eval): # Terminates eval job when final checkpoint is reached. current_step = int( six.ensure_str(os.path.basename(ckpt)).split('-')[1]) logging.info('Starting to evaluate.') try: executor.evaluate(eval_input_fn, eval_times, ckpt) if current_step >= params.train.total_steps: logging.info('Evaluation finished after training step %d', current_step) break except tf.errors.NotFoundError as e: logging.info( 'Erorr occurred during evaluation: NotFoundError: %s', e) elif FLAGS.mode == 'train_and_eval': config_utils.save_config(params_to_save, params.model_dir) num_cycles = int(params.train.total_steps / params.eval.num_steps_per_eval) for cycle in range(num_cycles): logging.info('Start training cycle %d.', cycle) current_cycle_last_train_step = ((cycle + 1) * params.eval.num_steps_per_eval) executor.train(train_input_fn, current_cycle_last_train_step) executor.evaluate(eval_input_fn, eval_times) else: logging.info('Mode not found.')
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Config template to train ShapeMask.""" from configs import base_config from hyperparameters import params_dict # pylint: disable=line-too-long SHAPEMASK_RESNET_FROZEN_VAR_PREFIX = r'(resnet\d+/)conv2d(|_([1-9]|10))\/' SHAPEMASK_CFG = params_dict.ParamsDict(base_config.BASE_CFG) SHAPEMASK_CFG.override( { 'type': 'shapemask', 'train': { 'total_steps': 45000, 'learning_rate': { 'learning_rate_steps': [30000, 40000], }, 'frozen_variable_prefix': SHAPEMASK_RESNET_FROZEN_VAR_PREFIX, 'regularization_variable_regex': None, }, 'eval': { 'type': 'shapemask_box_and_mask', 'mask_eval_class': 'all', # 'all', 'voc', or 'nonvoc'. },
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Config template to train Retinanet.""" from configs import base_config from hyperparameters import params_dict # pylint: disable=line-too-long RETINANET_CFG = params_dict.ParamsDict(base_config.BASE_CFG) RETINANET_CFG.override( { 'type': 'retinanet', 'architecture': { 'parser': 'retinanet_parser', 'backbone': 'resnet', 'multilevel_features': 'fpn', 'use_bfloat16': True, }, 'retinanet_parser': { 'use_bfloat16': True, 'output_size': [640, 640], 'match_threshold': 0.5, 'unmatched_threshold': 0.5, 'aug_rand_hflip': True,
def main(unused_argv): del unused_argv # Unused params = params_dict.ParamsDict({}, mobilenet_config.MOBILENET_RESTRICTIONS) params = flags_to_params.override_params_from_input_flags(params, FLAGS) params = params_dict.override_params_dict(params, mobilenet_config.MOBILENET_CFG, is_strict=False) params = params_dict.override_params_dict(params, FLAGS.config_file, is_strict=True) params = params_dict.override_params_dict(params, FLAGS.params_override, is_strict=True) input_perm = [0, 1, 2, 3] output_perm = [0, 1, 2, 3] batch_axis = 0 batch_size_per_shard = params.train_batch_size // params.num_cores if params.transpose_enabled: if batch_size_per_shard >= 64: input_perm = [3, 0, 1, 2] output_perm = [1, 2, 3, 0] batch_axis = 3 else: input_perm = [2, 0, 1, 3] output_perm = [1, 2, 0, 3] batch_axis = 2 additional_params = { 'input_perm': input_perm, 'output_perm': output_perm, } params = params_dict.override_params_dict(params, additional_params, is_strict=False) params.validate() params.lock() tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu if (FLAGS.tpu or params.use_tpu) else '', zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) if params.eval_total_size > 0: eval_size = params.eval_total_size else: eval_size = params.num_eval_images eval_steps = eval_size // params.eval_batch_size iterations = (eval_steps if FLAGS.mode == 'eval' else params.iterations_per_loop) eval_batch_size = (None if FLAGS.mode == 'train' else params.eval_batch_size) per_host_input_for_training = (params.num_cores <= 8 if FLAGS.mode == 'train' else True) run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=FLAGS.model_dir, save_checkpoints_secs=FLAGS.save_checkpoints_secs, save_summary_steps=FLAGS.save_summary_steps, session_config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement), tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=iterations, per_host_input_for_training=per_host_input_for_training)) inception_classifier = tf.contrib.tpu.TPUEstimator( model_fn=model_fn, use_tpu=params.use_tpu, config=run_config, params=params.as_dict(), train_batch_size=params.train_batch_size, eval_batch_size=eval_batch_size, batch_axis=(batch_axis, 0)) # Input pipelines are slightly different (with regards to shuffling and # preprocessing) between training and evaluation. imagenet_train = supervised_images.InputPipeline(is_training=True, data_dir=FLAGS.data_dir) imagenet_eval = supervised_images.InputPipeline(is_training=False, data_dir=FLAGS.data_dir) if params.moving_average: eval_hooks = [LoadEMAHook(FLAGS.model_dir)] else: eval_hooks = [] if FLAGS.mode == 'eval': def terminate_eval(): tf.logging.info('%d seconds without new checkpoints have elapsed ' '... terminating eval' % FLAGS.eval_timeout) return True def get_next_checkpoint(): return evaluation.checkpoints_iterator( FLAGS.model_dir, min_interval_secs=params.min_eval_interval, timeout=FLAGS.eval_timeout, timeout_fn=terminate_eval) for checkpoint in get_next_checkpoint(): tf.logging.info('Starting to evaluate.') try: eval_results = inception_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks, checkpoint_path=checkpoint) tf.logging.info('Evaluation results: %s' % eval_results) except tf.errors.NotFoundError: # skip checkpoint if it gets deleted prior to evaluation tf.logging.info('Checkpoint %s no longer exists ... skipping') elif FLAGS.mode == 'train_and_eval': for cycle in range(params.train_steps // params.train_steps_per_eval): tf.logging.info('Starting training cycle %d.' % cycle) inception_classifier.train(input_fn=imagenet_train.input_fn, steps=params.train_steps_per_eval) tf.logging.info('Starting evaluation cycle %d .' % cycle) eval_results = inception_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks) tf.logging.info('Evaluation results: %s' % eval_results) else: tf.logging.info('Starting training ...') inception_classifier.train(input_fn=imagenet_train.input_fn, steps=params.train_steps) if FLAGS.export_dir: tf.logging.info('Starting to export model with image input.') inception_classifier.export_saved_model( export_dir_base=FLAGS.export_dir, serving_input_receiver_fn=image_serving_input_fn) if FLAGS.tflite_export_dir: tf.logging.info('Starting to export default TensorFlow model.') savedmodel_dir = inception_classifier.export_saved_model( export_dir_base=FLAGS.tflite_export_dir, serving_input_receiver_fn=functools.partial(tensor_serving_input_fn, params)) # pylint: disable=line-too-long tf.logging.info('Starting to export TFLite.') converter = tf.lite.TFLiteConverter.from_saved_model( savedmodel_dir, output_arrays=['softmax_tensor']) tflite_file_name = 'mobilenet.tflite' if params.post_quantize: converter.post_training_quantize = True tflite_file_name = 'quantized_' + tflite_file_name tflite_file = os.path.join(savedmodel_dir, tflite_file_name) tflite_model = converter.convert() tf.gfile.GFile(tflite_file, 'wb').write(tflite_model)
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Config template to train Retinanet.""" from configs import detection_config from hyperparameters import params_dict # pylint: disable=line-too-long RETINANET_CFG = params_dict.ParamsDict(detection_config.DETECTION_CFG) RETINANET_CFG.override( { 'type': 'retinanet', 'architecture': { 'parser': 'retinanet_parser', 'backbone': 'resnet', 'multilevel_features': 'fpn', 'output_flat_fpn_features': False, }, 'retinanet_parser': { 'output_size': [640, 640], 'match_threshold': 0.5, 'unmatched_threshold': 0.5, 'aug_rand_hflip': True, 'aug_scale_min': 1.0,