def create_session_config(log_device_placement=False, enable_graph_rewriter=False, gpu_mem_fraction=0.95, use_tpu=False, xla_jit_level=tf.OptimizerOptions.OFF, inter_op_parallelism_threads=0, intra_op_parallelism_threads=0): """The TensorFlow Session config to use.""" if use_tpu: graph_options = tf.GraphOptions() else: if enable_graph_rewriter: rewrite_options = rewriter_config_pb2.RewriterConfig() rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.ON graph_options = tf.GraphOptions(rewrite_options=rewrite_options) else: graph_options = tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L1, do_function_inlining=False, global_jit_level=xla_jit_level)) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_mem_fraction) config = tf.ConfigProto( allow_soft_placement=True, graph_options=graph_options, gpu_options=gpu_options, log_device_placement=log_device_placement, inter_op_parallelism_threads=inter_op_parallelism_threads, intra_op_parallelism_threads=intra_op_parallelism_threads, isolate_session_state=True) return config
def main(): tf.disable_eager_execution() with tf.device('/gpu:0'): t1 = tf.random.uniform(shape=[32, 56, 56, 64], dtype=tf.half) t2 = tf.random.uniform(shape=[3, 3, 64, 64], dtype=tf.half) t = tf.nn.conv2d(input=t1, filters=t2, strides=[2, 2], padding='SAME', data_format='NHWC', name='Conv2D') run_options = tf.RunOptions() run_options.trace_level = run_options.FULL_TRACE run_metadata = tf.RunMetadata() options = tf.GraphOptions(build_cost_model=1) cfg = tf.ConfigProto(graph_options=options) with tf.Session(config=cfg) as sess: sess.run(tf.global_variables_initializer()) _ = sess.run([t], options=run_options, run_metadata=run_metadata) for node in run_metadata.cost_graph.node: if node.name == 'Conv2D': print(node.name, ':', node.compute_cost * 1000, 'ns.')
def build_and_export_tpu(model_path, export_model_path, master): export_graph = tf.Graph() tpu_config = tf.ConfigProto( operation_timeout_in_ms=600 * 1000, allow_soft_placement=True, graph_options=tf.GraphOptions( rewrite_options=rewriter_config_pb2.RewriterConfig( disable_meta_optimizer=True)), isolate_session_state=True) export_sess = tf.Session(master, graph=export_graph, config=tpu_config) if FLAGS.enable_bf16: if tf.io.gfile.exists(export_model_path + '_pre_bf16'): tf.io.gfile.rmtree(export_model_path + '_pre_bf16') builder = tf.compat.v1.saved_model.Builder(export_model_path + '_pre_bf16') else: if tf.io.gfile.exists(export_model_path): tf.io.gfile.rmtree(export_model_path) builder = tf.compat.v1.saved_model.Builder(export_model_path) with export_graph.as_default(): features, _ = get_inference_input() policy_output, value_output = tpu_call(tf.reshape(features, [-1])) tf.train.init_from_checkpoint(model_path, {'/': '/'}) export_sess.run(tf.initializers.global_variables()) signature_def_map = { tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.saved_model.predict_signature_def(inputs={ 'features': features, }, outputs={ 'policy_output': policy_output, 'value_output': value_output, }) } builder.add_meta_graph_and_variables( export_sess, tags=[ tf.saved_model.tag_constants.SERVING, tf.saved_model.tag_constants.TPU ], signature_def_map=signature_def_map) tf.logging.info('graph saved.') builder.save() if FLAGS.enable_bf16: tf.logging.info('Convert to BF16') options = converter_options_pb2.ConverterOptions() options.disable_convert = True converter_cli.ConvertSavedModel(export_model_path + '_pre_bf16', export_model_path, overwrite=True, options=options) tf.logging.info('BF16 Conversion Complete')
def get_session(params, isolate_session_state=True): """Builds and returns a `tf.Session`.""" config = tf.ConfigProto( isolate_session_state=isolate_session_state, allow_soft_placement=True, graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0, do_common_subexpression_elimination=False, do_function_inlining=False, do_constant_folding=False))) return tf.Session(target=params.master, config=config)
def __init__(self, hparams, train_iterations, eval_steps, per_host_v1=False): tf.logging.info("TrainLowLevelRunner: constructor") self.feature_structure = {} self.eval_feature_structure = {} self.loss = None self.infeed_queue = [] self.eval_infeed_queue = [] self.enqueue_ops = [] self.eval_enqueue_ops = [] self.dataset_initializer = [] self.eval_dataset_initializer = [] self.is_local = ((hparams.master == "") and (hparams.tpu_name is None)) self.per_host_v1 = per_host_v1 self.iterations = train_iterations self.eval_steps = eval_steps self.outfeed_tensors = [] self.outfeed_names = [] self.dequeue_ops = [] self.predictions = {} self.sess = None self.graph = tf.Graph() self.hparams = hparams self.num_hosts = hparams.num_shards // hparams.num_shards_per_host with self.graph.as_default(): self.tpu_init = [tpu.initialize_system()] self.tpu_shutdown = tpu.shutdown_system() self.resolver = get_resolver(hparams) session_config = tf.ConfigProto( allow_soft_placement=True, isolate_session_state=True, operation_timeout_in_ms=600 * 60 * 1000, graph_options=tf.GraphOptions( rewrite_options=rewriter_config_pb2.RewriterConfig( disable_meta_optimizer=True))) if self.hparams.tpu_name is None: master = self.hparams.master else: cluster_spec = self.resolver.cluster_spec() if cluster_spec: session_config.cluster_def.CopyFrom( cluster_spec.as_cluster_def()) master = self.resolver.get_master() self.sess = tf.Session(master, graph=self.graph, config=session_config) self.sess.run(self.tpu_init)
def main(unused_argv): input_image_size = FLAGS.input_image_size if not input_image_size: input_image_size = model_builder_factory.get_model_input_size( FLAGS.model_name) if FLAGS.holdout_shards: holdout_images = int(FLAGS.num_train_images * FLAGS.holdout_shards / 1024.0) FLAGS.num_train_images -= holdout_images FLAGS.num_eval_images = holdout_images # For imagenet dataset, include background label if number of output classes # is 1001 include_background_label = (FLAGS.num_label_classes == 1001) if FLAGS.tpu or FLAGS.use_tpu: tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) else: tpu_cluster_resolver = None if FLAGS.use_async_checkpointing: save_checkpoints_steps = None else: save_checkpoints_steps = max(100, FLAGS.iterations_per_loop) config = tf.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=FLAGS.model_dir, save_checkpoints_steps=save_checkpoints_steps, log_step_count_steps=FLAGS.log_step_count_steps, session_config=tf.ConfigProto( graph_options=tf.GraphOptions( rewrite_options=rewriter_config_pb2.RewriterConfig( disable_meta_optimizer=True))), tpu_config=tf.estimator.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, per_host_input_for_training=tf.estimator.tpu.InputPipelineConfig .PER_HOST_V2)) # pylint: disable=line-too-long # Initializes model parameters. params = dict(steps_per_epoch=FLAGS.num_train_images / FLAGS.train_batch_size, use_bfloat16=FLAGS.use_bfloat16) est = tf.estimator.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, export_to_tpu=FLAGS.export_to_tpu, params=params) if (FLAGS.model_name.startswith('efficientnet-lite') or FLAGS.model_name.startswith('efficientnet-edgetpu')): # lite or edgetpu use binlinear for easier post-quantization. resize_method = tf.image.ResizeMethod.BILINEAR else: resize_method = None # Input pipelines are slightly different (with regards to shuffling and # preprocessing) between training and evaluation. def build_imagenet_input(is_training): """Generate ImageNetInput for training and eval.""" if FLAGS.bigtable_instance: logging.info('Using Bigtable dataset, table %s', FLAGS.bigtable_table) select_train, select_eval = _select_tables_from_flags() return imagenet_input.ImageNetBigtableInput( is_training=is_training, use_bfloat16=FLAGS.use_bfloat16, transpose_input=FLAGS.transpose_input, selection=select_train if is_training else select_eval, num_label_classes=FLAGS.num_label_classes, include_background_label=include_background_label, augment_name=FLAGS.augment_name, mixup_alpha=FLAGS.mixup_alpha, randaug_num_layers=FLAGS.randaug_num_layers, randaug_magnitude=FLAGS.randaug_magnitude, resize_method=resize_method) else: if FLAGS.data_dir == FAKE_DATA_DIR: logging.info('Using fake dataset.') else: logging.info('Using dataset: %s', FLAGS.data_dir) return imagenet_input.ImageNetInput( is_training=is_training, data_dir=FLAGS.data_dir, transpose_input=FLAGS.transpose_input, cache=FLAGS.use_cache and is_training, image_size=input_image_size, num_parallel_calls=FLAGS.num_parallel_calls, use_bfloat16=FLAGS.use_bfloat16, num_label_classes=FLAGS.num_label_classes, include_background_label=include_background_label, augment_name=FLAGS.augment_name, mixup_alpha=FLAGS.mixup_alpha, randaug_num_layers=FLAGS.randaug_num_layers, randaug_magnitude=FLAGS.randaug_magnitude, resize_method=resize_method, holdout_shards=FLAGS.holdout_shards) imagenet_train = build_imagenet_input(is_training=True) imagenet_eval = build_imagenet_input(is_training=False) if FLAGS.mode == 'eval': eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size # Run evaluation when there's a new checkpoint for ckpt in tf.train.checkpoints_iterator(FLAGS.model_dir, timeout=FLAGS.eval_timeout): logging.info('Starting to evaluate.') try: start_timestamp = time.time( ) # This time will include compilation time eval_results = est.evaluate(input_fn=imagenet_eval.input_fn, steps=eval_steps, checkpoint_path=ckpt, name=FLAGS.eval_name) elapsed_time = int(time.time() - start_timestamp) logging.info('Eval results: %s. Elapsed seconds: %d', eval_results, elapsed_time) if FLAGS.archive_ckpt: utils.archive_ckpt(eval_results, eval_results['top_1_accuracy'], ckpt) # Terminate eval job when final checkpoint is reached try: current_step = int(os.path.basename(ckpt).split('-')[1]) except IndexError: logging.info('%s has no global step info: stop!', ckpt) break if current_step >= FLAGS.train_steps: logging.info('Evaluation finished after training step %d', current_step) break except tf.errors.NotFoundError: # Since the coordinator is on a different job than the TPU worker, # sometimes the TPU worker does not finish initializing until long after # the CPU job tells it to start evaluating. In this case, the checkpoint # file could have been deleted already. logging.info( 'Checkpoint %s no longer exists, skipping checkpoint', ckpt) else: # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval' current_step = estimator._load_global_step_from_checkpoint_dir( FLAGS.model_dir) # pylint: disable=protected-access,line-too-long logging.info( 'Training for %d steps (%.2f epochs in total). Current' ' step %d.', FLAGS.train_steps, FLAGS.train_steps / params['steps_per_epoch'], current_step) start_timestamp = time.time( ) # This time will include compilation time if FLAGS.mode == 'train': hooks = [] if FLAGS.use_async_checkpointing: try: from tensorflow.contrib.tpu.python.tpu import async_checkpoint # pylint: disable=g-import-not-at-top except ImportError as e: logging.exception( 'Async checkpointing is not supported in TensorFlow 2.x' ) raise e hooks.append( async_checkpoint.AsyncCheckpointSaverHook( checkpoint_dir=FLAGS.model_dir, save_steps=max(100, FLAGS.iterations_per_loop))) est.train(input_fn=imagenet_train.input_fn, max_steps=FLAGS.train_steps, hooks=hooks) else: assert FLAGS.mode == 'train_and_eval' while current_step < FLAGS.train_steps: # Train for up to steps_per_eval number of steps. # At the end of training, a checkpoint will be written to --model_dir. next_checkpoint = min(current_step + FLAGS.steps_per_eval, FLAGS.train_steps) est.train(input_fn=imagenet_train.input_fn, max_steps=next_checkpoint) current_step = next_checkpoint logging.info( 'Finished training up to step %d. Elapsed seconds %d.', next_checkpoint, int(time.time() - start_timestamp)) # Evaluate the model on the most recent model in --model_dir. # Since evaluation happens in batches of --eval_batch_size, some images # may be excluded modulo the batch size. As long as the batch size is # consistent, the evaluated images are also consistent. logging.info('Starting to evaluate.') eval_results = est.evaluate(input_fn=imagenet_eval.input_fn, steps=FLAGS.num_eval_images // FLAGS.eval_batch_size, name=FLAGS.eval_name) logging.info('Eval results at step %d: %s', next_checkpoint, eval_results) ckpt = tf.train.latest_checkpoint(FLAGS.model_dir) if FLAGS.archive_ckpt: utils.archive_ckpt(eval_results, eval_results['top_1_accuracy'], ckpt) elapsed_time = int(time.time() - start_timestamp) logging.info( 'Finished training up to step %d. Elapsed seconds %d.', FLAGS.train_steps, elapsed_time) if FLAGS.export_dir: export(est, FLAGS.export_dir, input_image_size)
def main(unused_argv): params = params_dict.ParamsDict( resnet_config.RESNET_CFG, resnet_config.RESNET_RESTRICTIONS) params = params_dict.override_params_dict( params, FLAGS.config_file, is_strict=True) params = params_dict.override_params_dict( params, FLAGS.params_override, is_strict=True) params = flags_to_params.override_params_from_input_flags(params, FLAGS) params.validate() params.lock() tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu if (FLAGS.tpu or params.use_tpu) else '', zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) if params.use_async_checkpointing: save_checkpoints_steps = None else: save_checkpoints_steps = max(5000, params.iterations_per_loop) config = tf.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=FLAGS.model_dir, save_checkpoints_steps=save_checkpoints_steps, log_step_count_steps=FLAGS.log_step_count_steps, session_config=tf.ConfigProto( graph_options=tf.GraphOptions( rewrite_options=rewriter_config_pb2.RewriterConfig( disable_meta_optimizer=True))), tpu_config=tf.estimator.tpu.TPUConfig( iterations_per_loop=params.iterations_per_loop, num_shards=params.num_cores, per_host_input_for_training=tf.estimator.tpu.InputPipelineConfig .PER_HOST_V2)) # pylint: disable=line-too-long resnet_classifier = tf.estimator.tpu.TPUEstimator( use_tpu=params.use_tpu, model_fn=resnet_model_fn, config=config, params=params.as_dict(), train_batch_size=params.train_batch_size, eval_batch_size=params.eval_batch_size, export_to_tpu=FLAGS.export_to_tpu) assert (params.precision == 'bfloat16' or params.precision == 'float32'), ( 'Invalid value for precision parameter; ' 'must be bfloat16 or float32.') tf.logging.info('Precision: %s', params.precision) use_bfloat16 = params.precision == 'bfloat16' # Input pipelines are slightly different (with regards to shuffling and # preprocessing) between training and evaluation. if FLAGS.bigtable_instance: tf.logging.info('Using Bigtable dataset, table %s', FLAGS.bigtable_table) select_train, select_eval = _select_tables_from_flags() imagenet_train, imagenet_eval = [ imagenet_input.ImageNetBigtableInput( # pylint: disable=g-complex-comprehension is_training=is_training, use_bfloat16=use_bfloat16, transpose_input=params.transpose_input, selection=selection, augment_name=FLAGS.augment_name, randaug_num_layers=FLAGS.randaug_num_layers, randaug_magnitude=FLAGS.randaug_magnitude) for (is_training, selection) in [(True, select_train), (False, select_eval)] ] else: if FLAGS.data_dir == FAKE_DATA_DIR: tf.logging.info('Using fake dataset.') else: tf.logging.info('Using dataset: %s', FLAGS.data_dir) imagenet_train, imagenet_eval = [ imagenet_input.ImageNetInput( # pylint: disable=g-complex-comprehension is_training=is_training, data_dir=FLAGS.data_dir, transpose_input=params.transpose_input, cache=params.use_cache and is_training, image_size=params.image_size, num_parallel_calls=params.num_parallel_calls, include_background_label=(params.num_label_classes == 1001), use_bfloat16=use_bfloat16, augment_name=FLAGS.augment_name, randaug_num_layers=FLAGS.randaug_num_layers, randaug_magnitude=FLAGS.randaug_magnitude) for is_training in [True, False] ] steps_per_epoch = params.num_train_images // params.train_batch_size eval_steps = params.num_eval_images // params.eval_batch_size if FLAGS.mode == 'eval': # Run evaluation when there's a new checkpoint for ckpt in tf.train.checkpoints_iterator( FLAGS.model_dir, timeout=FLAGS.eval_timeout): tf.logging.info('Starting to evaluate.') try: start_timestamp = time.time() # This time will include compilation time eval_results = resnet_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, checkpoint_path=ckpt) elapsed_time = int(time.time() - start_timestamp) tf.logging.info('Eval results: %s. Elapsed seconds: %d', eval_results, elapsed_time) # Terminate eval job when final checkpoint is reached current_step = int(os.path.basename(ckpt).split('-')[1]) if current_step >= params.train_steps: tf.logging.info( 'Evaluation finished after training step %d', current_step) break except tf.errors.NotFoundError: # Since the coordinator is on a different job than the TPU worker, # sometimes the TPU worker does not finish initializing until long after # the CPU job tells it to start evaluating. In this case, the checkpoint # file could have been deleted already. tf.logging.info( 'Checkpoint %s no longer exists, skipping checkpoint', ckpt) else: # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval' try: current_step = tf.train.load_variable(FLAGS.model_dir, tf.GraphKeys.GLOBAL_STEP) except (TypeError, ValueError, tf.errors.NotFoundError): current_step = 0 steps_per_epoch = params.num_train_images // params.train_batch_size tf.logging.info('Training for %d steps (%.2f epochs in total). Current' ' step %d.', params.train_steps, params.train_steps / steps_per_epoch, current_step) start_timestamp = time.time() # This time will include compilation time if FLAGS.mode == 'train': hooks = [] if params.use_async_checkpointing: try: from tensorflow.contrib.tpu.python.tpu import async_checkpoint # pylint: disable=g-import-not-at-top except ImportError as e: logging.exception( 'Async checkpointing is not supported in TensorFlow 2.x') raise e hooks.append( async_checkpoint.AsyncCheckpointSaverHook( checkpoint_dir=FLAGS.model_dir, save_steps=max(5000, params.iterations_per_loop))) if FLAGS.profile_every_n_steps > 0: hooks.append( tpu_profiler_hook.TPUProfilerHook( save_steps=FLAGS.profile_every_n_steps, output_dir=FLAGS.model_dir, tpu=FLAGS.tpu) ) resnet_classifier.train( input_fn=imagenet_train.input_fn, max_steps=params.train_steps, hooks=hooks) else: assert FLAGS.mode == 'train_and_eval' while current_step < params.train_steps: # Train for up to steps_per_eval number of steps. # At the end of training, a checkpoint will be written to --model_dir. next_checkpoint = min(current_step + FLAGS.steps_per_eval, params.train_steps) resnet_classifier.train( input_fn=imagenet_train.input_fn, max_steps=next_checkpoint) current_step = next_checkpoint tf.logging.info('Finished training up to step %d. Elapsed seconds %d.', next_checkpoint, int(time.time() - start_timestamp)) # Evaluate the model on the most recent model in --model_dir. # Since evaluation happens in batches of --eval_batch_size, some images # may be excluded modulo the batch size. As long as the batch size is # consistent, the evaluated images are also consistent. tf.logging.info('Starting to evaluate.') eval_results = resnet_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=params.num_eval_images // params.eval_batch_size) tf.logging.info('Eval results at step %d: %s', next_checkpoint, eval_results) elapsed_time = int(time.time() - start_timestamp) tf.logging.info('Finished training up to step %d. Elapsed seconds %d.', params.train_steps, elapsed_time) if FLAGS.export_dir is not None: # The guide to serve a exported TensorFlow model is at: # https://www.tensorflow.org/serving/serving_basic tf.logging.info('Starting to export model.') export_path = resnet_classifier.export_saved_model( export_dir_base=FLAGS.export_dir, serving_input_receiver_fn=imagenet_input.image_serving_input_fn) if FLAGS.add_warmup_requests: inference_warmup.write_warmup_requests( export_path, FLAGS.model_name, params.image_size, batch_sizes=FLAGS.inference_batch_sizes, image_format='JPEG')
def main(unused_argv): params = params_dict.ParamsDict( mnasnet_config.MNASNET_CFG, mnasnet_config.MNASNET_RESTRICTIONS) params = params_dict.override_params_dict( params, FLAGS.config_file, is_strict=True) params = params_dict.override_params_dict( params, FLAGS.params_override, is_strict=True) params = flags_to_params.override_params_from_input_flags(params, FLAGS) additional_params = { 'steps_per_epoch': params.num_train_images / params.train_batch_size, 'quantized_training': FLAGS.quantized_training, 'add_summaries': FLAGS.add_summaries, } params = params_dict.override_params_dict( params, additional_params, is_strict=False) params.validate() params.lock() if FLAGS.tpu or params.use_tpu: tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) else: tpu_cluster_resolver = None if params.use_async_checkpointing: save_checkpoints_steps = None else: save_checkpoints_steps = max(100, params.iterations_per_loop) # Enables automatic outside compilation. Required in order to # automatically detect summary ops to run on CPU instead of TPU. tf.config.set_soft_device_placement(True) config = tf.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=FLAGS.model_dir, save_checkpoints_steps=save_checkpoints_steps, log_step_count_steps=FLAGS.log_step_count_steps, session_config=tf.ConfigProto( graph_options=tf.GraphOptions( rewrite_options=rewriter_config_pb2.RewriterConfig( disable_meta_optimizer=True))), tpu_config=tf.estimator.tpu.TPUConfig( iterations_per_loop=params.iterations_per_loop, per_host_input_for_training=tf.estimator.tpu.InputPipelineConfig .PER_HOST_V2)) # pylint: disable=line-too-long # Validates Flags. if params.precision == 'bfloat16' and params.use_keras: raise ValueError( 'Keras layers do not have full support to bfloat16 activation training.' ' You have set precision as %s and use_keras as %s' % (params.precision, params.use_keras)) # Initializes model parameters. mnasnet_est = tf.estimator.tpu.TPUEstimator( use_tpu=params.use_tpu, model_fn=build_model_fn, config=config, train_batch_size=params.train_batch_size, eval_batch_size=params.eval_batch_size, export_to_tpu=FLAGS.export_to_tpu, params=params.as_dict()) if FLAGS.mode == 'export_only': export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize) return # Input pipelines are slightly different (with regards to shuffling and # preprocessing) between training and evaluation. if FLAGS.bigtable_instance: tf.logging.info('Using Bigtable dataset, table %s', FLAGS.bigtable_table) select_train, select_eval = _select_tables_from_flags() imagenet_train, imagenet_eval = [imagenet_input.ImageNetBigtableInput( is_training=is_training, use_bfloat16=False, transpose_input=params.transpose_input, selection=selection) for (is_training, selection) in [(True, select_train), (False, select_eval)]] else: if FLAGS.data_dir == FAKE_DATA_DIR: tf.logging.info('Using fake dataset.') else: tf.logging.info('Using dataset: %s', FLAGS.data_dir) imagenet_train, imagenet_eval = [ imagenet_input.ImageNetInput( is_training=is_training, data_dir=FLAGS.data_dir, transpose_input=params.transpose_input, cache=params.use_cache and is_training, image_size=params.input_image_size, num_parallel_calls=params.num_parallel_calls, use_bfloat16=(params.precision == 'bfloat16')) for is_training in [True, False] ] if FLAGS.mode == 'eval': eval_steps = params.num_eval_images // params.eval_batch_size # Run evaluation when there's a new checkpoint for ckpt in tf.train.checkpoints_iterator( FLAGS.model_dir, timeout=FLAGS.eval_timeout): tf.logging.info('Starting to evaluate.') try: start_timestamp = time.time() # This time will include compilation time eval_results = mnasnet_est.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, checkpoint_path=ckpt) elapsed_time = int(time.time() - start_timestamp) tf.logging.info('Eval results: %s. Elapsed seconds: %d', eval_results, elapsed_time) mnas_utils.archive_ckpt(eval_results, eval_results['top_1_accuracy'], ckpt) # Terminate eval job when final checkpoint is reached current_step = int(os.path.basename(ckpt).split('-')[1]) if current_step >= params.train_steps: tf.logging.info('Evaluation finished after training step %d', current_step) break except tf.errors.NotFoundError: # Since the coordinator is on a different job than the TPU worker, # sometimes the TPU worker does not finish initializing until long after # the CPU job tells it to start evaluating. In this case, the checkpoint # file could have been deleted already. tf.logging.info('Checkpoint %s no longer exists, skipping checkpoint', ckpt) if FLAGS.export_dir: export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize) else: # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval' try: current_step = tf.train.load_variable(FLAGS.model_dir, tf.GraphKeys.GLOBAL_STEP) except (TypeError, ValueError, tf.errors.NotFoundError): current_step = 0 tf.logging.info( 'Training for %d steps (%.2f epochs in total). Current' ' step %d.', params.train_steps, params.train_steps / params.steps_per_epoch, current_step) start_timestamp = time.time() # This time will include compilation time if FLAGS.mode == 'train': hooks = [] if params.use_async_checkpointing: try: from tensorflow.contrib.tpu.python.tpu import async_checkpoint # pylint: disable=g-import-not-at-top except ImportError as e: logging.exception( 'Async checkpointing is not supported in TensorFlow 2.x') raise e hooks.append( async_checkpoint.AsyncCheckpointSaverHook( checkpoint_dir=FLAGS.model_dir, save_steps=max(100, params.iterations_per_loop))) mnasnet_est.train( input_fn=imagenet_train.input_fn, max_steps=params.train_steps, hooks=hooks) else: assert FLAGS.mode == 'train_and_eval' while current_step < params.train_steps: # Train for up to steps_per_eval number of steps. # At the end of training, a checkpoint will be written to --model_dir. next_checkpoint = min(current_step + FLAGS.steps_per_eval, params.train_steps) mnasnet_est.train( input_fn=imagenet_train.input_fn, max_steps=next_checkpoint) current_step = next_checkpoint tf.logging.info('Finished training up to step %d. Elapsed seconds %d.', next_checkpoint, int(time.time() - start_timestamp)) # Evaluate the model on the most recent model in --model_dir. # Since evaluation happens in batches of --eval_batch_size, some images # may be excluded modulo the batch size. As long as the batch size is # consistent, the evaluated images are also consistent. tf.logging.info('Starting to evaluate.') eval_results = mnasnet_est.evaluate( input_fn=imagenet_eval.input_fn, steps=params.num_eval_images // params.eval_batch_size) tf.logging.info('Eval results at step %d: %s', next_checkpoint, eval_results) ckpt = tf.train.latest_checkpoint(FLAGS.model_dir) mnas_utils.archive_ckpt(eval_results, eval_results['top_1_accuracy'], ckpt) elapsed_time = int(time.time() - start_timestamp) tf.logging.info('Finished training up to step %d. Elapsed seconds %d.', params.train_steps, elapsed_time) if FLAGS.export_dir: export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize)
def main(unused_argv): input_image_size = FLAGS.input_image_size if not input_image_size: if FLAGS.model_name.startswith('efficientnet'): _, _, input_image_size, _ = efficientnet_builder.efficientnet_params( FLAGS.model_name) else: raise ValueError( 'input_image_size must be set except for EfficientNet') config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_max=FLAGS.keep_checkpoint_max, log_step_count_steps=FLAGS.log_step_count_steps, session_config=tf.ConfigProto(graph_options=tf.GraphOptions( rewrite_options=rewriter_config_pb2.RewriterConfig( disable_meta_optimizer=True)))) # Initializes model parameters. params = dict(steps_per_epoch=FLAGS.num_train_images / FLAGS.train_batch_size) est = tf.estimator.Estimator(model_fn=model_fn, config=config, params=params) def build_input(is_training): """Input for training and eval.""" tf.logging.info('Using dataset: %s', FLAGS.data_dir) return egg_candler_input.EggCandlerInput( is_training=is_training, data_dir=FLAGS.data_dir, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, image_size=input_image_size) image_train = build_input(is_training=True) image_eval = build_input(is_training=False) if FLAGS.mode == 'eval': eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size # Run evaluation when there's a new checkpoint for ckpt in tf.train.checkpoints_iterator(FLAGS.model_dir, timeout=FLAGS.eval_timeout): tf.logging.info('Starting to evaluate.') try: start_timestamp = time.time( ) # This time will include compilation time eval_results = est.evaluate(input_fn=image_eval.input_fn, steps=eval_steps, checkpoint_path=ckpt) elapsed_time = int(time.time() - start_timestamp) tf.logging.info('Eval results: %s. Elapsed seconds: %d', eval_results, elapsed_time) utils.archive_ckpt(eval_results, eval_results['val_accuracy'], ckpt) # Terminate eval job when final checkpoint is reached current_step = int(os.path.basename(ckpt).split('-')[1]) if current_step >= FLAGS.train_steps: tf.logging.info( 'Evaluation finished after training step %d', current_step) break except tf.errors.NotFoundError: # Since the coordinator is on a different job than the TPU worker, # sometimes the TPU worker does not finish initializing until long after # the CPU job tells it to start evaluating. In this case, the checkpoint # file could have been deleted already. tf.logging.info( 'Checkpoint %s no longer exists, skipping checkpoint', ckpt) else: # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval' current_step = estimator._load_global_step_from_checkpoint_dir( FLAGS.model_dir) # pylint: disable=protected-access,line-too-long tf.logging.info( 'Training for %d steps (%.2f epochs in total). Current' ' step %d.', FLAGS.train_steps, FLAGS.train_steps / params['steps_per_epoch'], current_step) start_timestamp = time.time( ) # This time will include compilation time if FLAGS.mode == 'train': est.train(input_fn=image_train.input_fn, max_steps=FLAGS.train_steps, hooks=[]) else: assert FLAGS.mode == 'train_and_eval' while current_step < FLAGS.train_steps: # Train for up to steps_per_eval number of steps. # At the end of training, a checkpoint will be written to --model_dir. next_checkpoint = min(current_step + FLAGS.steps_per_eval, FLAGS.train_steps) est.train(input_fn=image_train.input_fn, max_steps=next_checkpoint, hooks=[]) current_step = next_checkpoint tf.logging.info( 'Finished training up to step %d. Elapsed seconds %d.', next_checkpoint, int(time.time() - start_timestamp)) # Evaluate the model on the most recent model in --model_dir. # Since evaluation happens in batches of --eval_batch_size, some images # may be excluded modulo the batch size. As long as the batch size is # consistent, the evaluated images are also consistent. tf.logging.info('Starting to evaluate.') eval_results = est.evaluate(input_fn=image_eval.input_fn, steps=FLAGS.num_eval_images // FLAGS.eval_batch_size) tf.logging.info('Eval results at step %d: %s', next_checkpoint, eval_results) ckpt = tf.train.latest_checkpoint(FLAGS.model_dir) utils.archive_ckpt(eval_results, eval_results['val_accuracy'], ckpt) elapsed_time = int(time.time() - start_timestamp) tf.logging.info( 'Finished training up to step %d. Elapsed seconds %d.', FLAGS.train_steps, elapsed_time) if FLAGS.export_dir: export(est, FLAGS.export_dir, input_image_size)
def main(unused_argv): params = hyperparameters.get_hyperparameters(FLAGS.default_hparams_file, FLAGS.hparams_file, FLAGS, FLAGS.hparams) tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu if (FLAGS.tpu or params['use_tpu']) else '', zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) if params['use_async_checkpointing']: save_checkpoints_steps = None else: save_checkpoints_steps = max(2500, params['iterations_per_loop']) config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=get_model_dir(params), save_checkpoints_steps=save_checkpoints_steps, keep_checkpoint_max=None, # Keep all checkpoints. log_step_count_steps=FLAGS.log_step_count_steps, session_config=tf.ConfigProto( graph_options=tf.GraphOptions( rewrite_options=rewriter_config_pb2.RewriterConfig( disable_meta_optimizer=True))), tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=params['iterations_per_loop'], num_shards=params['num_cores'], # copybara:strip_begin tpu_job_name=FLAGS.tpu_job_name, # copybara:strip_end per_host_input_for_training=contrib_tpu.InputPipelineConfig .PER_HOST_V2)) # pylint: disable=line-too-long resnet_classifier = contrib_tpu.TPUEstimator( use_tpu=params['use_tpu'], model_fn=resnet_model_fn, config=config, params=params, train_batch_size=params['train_batch_size'], eval_batch_size=params['eval_batch_size'], export_to_tpu=FLAGS.export_to_tpu) # copybara:strip_begin if FLAGS.xla_compile: resnet_classifier = contrib_tpu.TPUEstimator( use_tpu=params['use_tpu'], model_fn=xla.estimator_model_fn(resnet_model_fn), config=config, params=params, train_batch_size=params['train_batch_size'], eval_batch_size=params['eval_batch_size'], export_to_tpu=FLAGS.export_to_tpu) # copybara:strip_end assert (params['precision'] == 'bfloat16' or params['precision'] == 'float32'), ('Invalid value for precision parameter; ' 'must be bfloat16 or float32.') tf.logging.info('Precision: %s', params['precision']) use_bfloat16 = params['precision'] == 'bfloat16' # Input pipelines are slightly different (with regards to shuffling and # preprocessing) between training and evaluation. if FLAGS.bigtable_instance: tf.logging.info('Using Bigtable dataset, table %s', FLAGS.bigtable_table) select_train, select_eval = _select_tables_from_flags() imagenet_train = imagenet_input.ImageNetBigtableInput( is_training=True, use_bfloat16=use_bfloat16, transpose_input=params['transpose_input'], selection=select_train) imagenet_eval = imagenet_input.ImageNetBigtableInput( is_training=False, use_bfloat16=use_bfloat16, transpose_input=params['transpose_input'], selection=select_eval) else: if FLAGS.data_dir == FAKE_DATA_DIR: tf.logging.info('Using fake dataset.') else: tf.logging.info('Using dataset: %s', FLAGS.data_dir) imagenet_train, imagenet_eval = [ imagenet_input.ImageNetInput( is_training=is_training, data_dir=FLAGS.data_dir, transpose_input=params['transpose_input'], cache=params['use_cache'] and is_training, image_size=params['image_size'], num_parallel_calls=params['num_parallel_calls'], use_bfloat16=use_bfloat16) for is_training in [True, False] ] steps_per_epoch = params['num_train_images'] // params['train_batch_size'] eval_steps = params['num_eval_images'] // params['eval_batch_size'] if FLAGS.mode == 'eval': # Run evaluation when there's a new checkpoint for ckpt in evaluation.checkpoints_iterator( get_model_dir(params), timeout=FLAGS.eval_timeout): tf.logging.info('Starting to evaluate.') try: start_timestamp = time.time( ) # This time will include compilation time eval_results = resnet_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, checkpoint_path=ckpt) elapsed_time = int(time.time() - start_timestamp) tf.logging.info('Eval results: %s. Elapsed seconds: %d', eval_results, elapsed_time) # Terminate eval job when final checkpoint is reached current_step = int(os.path.basename(ckpt).split('-')[1]) if current_step >= params['train_steps']: tf.logging.info( 'Evaluation finished after training step %d', current_step) break except tf.errors.NotFoundError: # Since the coordinator is on a different job than the TPU worker, # sometimes the TPU worker does not finish initializing until long after # the CPU job tells it to start evaluating. In this case, the checkpoint # file could have been deleted already. tf.logging.info( 'Checkpoint %s no longer exists, skipping checkpoint', ckpt) elif FLAGS.mode == 'eval_igt': # IGT evaluation mode. Evaluate metrics for the desired parameters # (true or shifted) on the desired dataset (train or eval). Note that # train is still with data augmentation. # Get checkpoint file names. index_files = tf.gfile.Glob( os.path.join(get_model_dir(params), 'model.ckpt-*.index')) checkpoints = [fn[:-len('.index')] for fn in index_files] # Need to sort them to get proper tensorboard plotting (increasing event # timestamps correspond to increasing steps). checkpoint_steps = [] for ckpt in checkpoints: tf.logging.info(ckpt) step_match = re.match(r'.*model.ckpt-([0-9]*)', ckpt) checkpoint_steps.append(int(step_match.group(1))) checkpoints = [ ckpt for _, ckpt in sorted(zip(checkpoint_steps, checkpoints)) ] tf.logging.info('There are {} checkpoints'.format(len(checkpoints))) tf.logging.info(', '.join(checkpoints)) # Keep track of the last processed checkpoint (fault tolerance). analysis_state_path = os.path.join( get_model_dir(params), 'analysis_state_' + FLAGS.igt_eval_set + '_' + FLAGS.igt_eval_mode) next_analysis_index = 0 if tf.gfile.Exists(analysis_state_path): with tf.gfile.Open(analysis_state_path) as fd: next_analysis_index = int(fd.read()) # Process each checkpoint. while next_analysis_index < len(checkpoints): tf.logging.info( 'Next analysis index: {}'.format(next_analysis_index)) ckpt_path = checkpoints[next_analysis_index] tf.logging.info('Starting to evaluate: {}.'.format(ckpt_path)) start_timestamp = time.time( ) # This time will include compilation time if FLAGS.igt_eval_set == 'train': the_input_fn = imagenet_train.input_fn the_steps = steps_per_epoch elif FLAGS.igt_eval_set == 'eval': the_input_fn = imagenet_eval.input_fn the_steps = eval_steps else: raise ValueError('Unsupported igt_eval_set') eval_results = resnet_classifier.evaluate( input_fn=the_input_fn, steps=the_steps, checkpoint_path=ckpt_path, name=FLAGS.igt_eval_set + '_' + FLAGS.igt_eval_mode) elapsed_time = int(time.time() - start_timestamp) tf.logging.info('Eval results: %s. Elapsed seconds: %d', eval_results, elapsed_time) next_analysis_index += 1 file_io.atomic_write_string_to_file(analysis_state_path, str(next_analysis_index)) else: # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval' current_step = estimator._load_global_step_from_checkpoint_dir( get_model_dir(params)) # pylint:disable=protected-access,g-line-too-long steps_per_epoch = params['num_train_images'] // params[ 'train_batch_size'] tf.logging.info( 'Training for %d steps (%.2f epochs in total). Current' ' step %d.', params['train_steps'], params['train_steps'] / steps_per_epoch, current_step) start_timestamp = time.time( ) # This time will include compilation time if FLAGS.mode == 'train': hooks = [] if params['use_async_checkpointing']: hooks.append( async_checkpoint.AsyncCheckpointSaverHook( checkpoint_dir=get_model_dir(params), save_steps=max(2500, params['iterations_per_loop']))) resnet_classifier.train(input_fn=imagenet_train.input_fn, max_steps=params['train_steps'], hooks=hooks) else: assert FLAGS.mode == 'train_and_eval' while current_step < params['train_steps']: # Train for up to steps_per_eval number of steps. # At the end of training, a checkpoint will be written to --model_dir. next_checkpoint = min(current_step + FLAGS.steps_per_eval, params['train_steps']) resnet_classifier.train(input_fn=imagenet_train.input_fn, max_steps=next_checkpoint) current_step = next_checkpoint tf.logging.info( 'Finished training up to step %d. Elapsed seconds %d.', next_checkpoint, int(time.time() - start_timestamp)) # Evaluate the model on the most recent model in --model_dir. # Since evaluation happens in batches of --eval_batch_size, some images # may be excluded modulo the batch size. As long as the batch size is # consistent, the evaluated images are also consistent. tf.logging.info('Starting to evaluate.') eval_results = resnet_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=params['num_eval_images'] // params['eval_batch_size']) tf.logging.info('Eval results at step %d: %s', next_checkpoint, eval_results) elapsed_time = int(time.time() - start_timestamp) tf.logging.info( 'Finished training up to step %d. Elapsed seconds %d.', params['train_steps'], elapsed_time) if FLAGS.export_dir is not None: # The guide to serve a exported TensorFlow model is at: # https://www.tensorflow.org/serving/serving_basic tf.logging.info('Starting to export model.') unused_export_path = resnet_classifier.export_saved_model( export_dir_base=FLAGS.export_dir, serving_input_receiver_fn=imagenet_input.image_serving_input_fn )
def test_sample_auxiliary_op(self): p_fn, q_fn = sampling.mean_field_fn() p = p_fn(tf.float32, (), 'test_prior', True, tf.get_variable).distribution q = q_fn(tf.float32, (), 'test_posterior', True, tf.get_variable).distribution # Test benign auxiliary variable sample_op, _ = sampling.sample_auxiliary_op(p, q, 1e-10) session_config = tf.ConfigProto(graph_options=tf.GraphOptions( rewrite_options=rewriter_config_pb2.RewriterConfig( arithmetic_optimization=rewriter_config_pb2.RewriterConfig. AGGRESSIVE))) sess = tf.Session(config=session_config) sess.run(tf.initialize_all_variables()) p.loc.load(1., session=sess) p.untransformed_scale.load(self._softplus_inverse_np(1.), session=sess) q.loc.load(1.1, session=sess) q.untransformed_scale.load(self._softplus_inverse_np(0.5), session=sess) print(sess.run(q.scale)) sess.run(sample_op) tolerance = 0.0001 self.assertLess(np.abs(sess.run(p.scale) - 1.), tolerance) self.assertLess(np.abs(sess.run(p.loc) - 1.), tolerance) self.assertLess(np.abs(sess.run(q.scale) - 0.5), tolerance) self.assertLess(np.abs(sess.run(q.loc) - 1.1), tolerance) # Test fully determining auxiliary variable sample_op, _ = sampling.sample_auxiliary_op(p, q, 1. - 1e-10) sess.run(tf.initialize_all_variables()) p.loc.load(1., session=sess) p.untransformed_scale.load(self._softplus_inverse_np(1.), session=sess) q.loc.load(1.1, session=sess) q.untransformed_scale.load(self._softplus_inverse_np(.5), session=sess) sess.run(sample_op) self.assertLess(np.abs(sess.run(q.loc) - sess.run(p.loc)), tolerance) self.assertLess(sess.run(p.scale), tolerance) self.assertLess(sess.run(q.scale), tolerance) # Test delta posterior sample_op, _ = sampling.sample_auxiliary_op(p, q, 0.5) sess.run(tf.initialize_all_variables()) p.loc.load(1., session=sess) p.untransformed_scale.load(self._softplus_inverse_np(1.), session=sess) q.loc.load(1.1, session=sess) q.untransformed_scale.load(self._softplus_inverse_np(1e-10), session=sess) sess.run(sample_op) self.assertLess(np.abs(sess.run(q.loc) - 1.1), tolerance) self.assertLess(sess.run(q.scale), tolerance) # Test prior is posterior sample_op, _ = sampling.sample_auxiliary_op(p, q, 0.5) sess.run(tf.initialize_all_variables()) p.loc.load(1., session=sess) p.untransformed_scale.load(self._softplus_inverse_np(1.), session=sess) q.loc.load(1., session=sess) q.untransformed_scale.load(self._softplus_inverse_np(1.), session=sess) sess.run(sample_op) self.assertLess(np.abs(sess.run(q.loc - p.loc)), tolerance) self.assertLess(np.abs(sess.run(q.scale - p.scale)), tolerance)
def __init__(self, iterations_per_loop, train_steps, eval_steps, num_replicas, eval_dataset_repeats=True, do_initialize=True): self.feature_structure = {} self.infeed_op = {} self.num_replicas = num_replicas self.eval_dataset_repeats = eval_dataset_repeats # Set number of input graphs to number of hosts up to a maximum of 32. self.num_input_graphs = min( 32, self.num_replicas // FLAGS.replicas_per_host) # Following data has separated copies for training and eval, thus # represented as a map from is_train(boolean) to actual data self.dataset_initializer = {True: [], False: []} self.input_graph = {True: [], False: []} self.input_sess = {True: [], False: []} self.enqueue_ops = {True: [], False: []} for _ in range(self.num_input_graphs): self.input_graph[True].append(tf.Graph()) self.input_graph[False].append(tf.Graph()) self.dataset_initializer[True].append([]) self.dataset_initializer[False].append([]) self.enqueue_ops[True].append([]) self.enqueue_ops[False].append([]) self.input_sess[True].append([]) self.input_sess[False].append([]) # dequeue_ops is only for eval self.dequeue_ops = [] self.iterations_per_loop = iterations_per_loop self.sess = None self.output_sess = None self.train_eval_thread = None self.graph = tf.Graph() if iterations_per_loop != 0 and train_steps % iterations_per_loop != 0: train_steps = iterations_per_loop * int( math.ceil(train_steps / iterations_per_loop)) self.train_steps = train_steps if iterations_per_loop == 0: self.max_train_iterations = 1 else: self.max_train_iterations = train_steps // iterations_per_loop self.eval_steps = int(eval_steps) self.train_batch_size = 0 self.eval_batch_size = 0 self.eval_has_labels = 0 self.model_fn = None self.num_outfeeds = self.eval_steps self.config = tf.ConfigProto( operation_timeout_in_ms=600 * 60 * 1000, allow_soft_placement=True, graph_options=tf.GraphOptions( rewrite_options=rewriter_config_pb2.RewriterConfig( disable_meta_optimizer=True)), isolate_session_state=True) if FLAGS.enable_mlir_bridge: self.config.experimental.enable_mlir_bridge = True tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( FLAGS.master, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project, job_name="tpu_worker") self.master = tpu_cluster_resolver.get_master() self.job_name = tpu_cluster_resolver.get_job_name() or "tpu_worker" self.embedding_config = None self.device_topology = None if do_initialize: self.device_topology = tf.Session( self.master, config=self.config).run(tpu.initialize_system())
def main(): # current camera frame global frame, annotatedFrame, frameQueue, currentFps, selectedIdx, selectedClassName, objectDistance, boxes, scores, stats global currentMode, M_AUTOMANEUVER, M_AUTONAV, M_MANUAL # print(cv2.getBuildInformation()) print("Loading model") detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(CHKPT_PATH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(LABELS_PATH) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=2, use_display_name=True) category_index = label_map_util.create_category_index(categories) print("Starting main python module") if not DEBUG_DISABLE_FLIGHT: flightData = Drone(updateFlightInfo) process = Thread(target=flight.flightMain, args=(flightData, )) process.start() ip = '0.0.0.0' server = ThreadedHTTPServer((ip, 9090), CamHandler) target = Thread(target=server.serve_forever, args=()) i = 0 # To flip the image, modify the flip_method parameter (0 and 2 are the most common) #print(gstreamer_pipeline(flip_method=0)) cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=2), cv2.CAP_GSTREAMER) fpsSmoothing = 70 lastUpdate = time.time() try: if cap.isOpened(): print("CSI Camera opened") graph_options = tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L1, )) OptConfig = tf.ConfigProto(graph_options=graph_options) with detection_graph.as_default(): with tf.Session(graph=detection_graph, config=OptConfig) as sess: # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') # Each box represents a part of the image where a particular object # was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class # label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') i = 0 print("TensorFlow session loaded.") while mainThreadRunning: ret_val, img = cap.read() frame = img # convert OpenCV's BGR to RGB as the model # was trained on RGB images color_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # resize image to model size of 360x270 color_frame = cv2.resize(color_frame, (360, 270), interpolation=cv2.INTER_CUBIC) image_np_expanded = np.expand_dims(color_frame, axis=0) # Actual detection (boxes, scores, classes, num) = sess.run( [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) # Draw boxes using TF library, should be off during competition if useBoxVisualization: vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=4, min_score_thresh=MIN_CONFIDENCE) # Now that we have the detected BBoxes, it's time to determine our current obstacle # First, gather stats about the bounding boxes # squeezing makes it so you can do access box[i] directly instead of having to # access box[0][i] boxes = np.squeeze(boxes) classes = np.squeeze(classes) scores = np.squeeze(scores) stats = [] j = 0 # This is 15ft, any object farther than that is a misidentification lowestDistance = 15 if DEBUG_DUMP_DETECTIONS: print("Boxes // Classes // Scores") print(boxes) print(classes) print(scores) # Reset selections selectedIdx = None if len(boxes) > 0: for j in range(0, len(boxes)): if scores[j] >= MIN_CONFIDENCE: stats.insert( j, getBoxStats(boxes[j], classes[j])) # print("box[%d] distance is %f" % (j, stats[j]['distance'])) if stats[j]['distance'] < lowestDistance: selectedIdx = j selectedClassName = classToString( classes[j]) objectDistance = stats[j]['distance'] lowestDistance = objectDistance #print("Selected box[%d]: distance %f class %s conf %f" % (j, objectDistance, selectedClassName, scores[j])) else: # Skip calculations on this box if it does not meet # confidence threshold stats.insert(j, 0) if not DEBUG_DISABLE_FLIGHT: if selectedIdx is not None: flightData.upData(stats[selectedIdx], selectedClassName) else: flightData.upData(None, "None") # add the HUD to the current image annotatedFrame = applyHud() # currentFrameTime = time.time() #if frameQueue.full(): # with frameQueue.mutex: # frameQueue.queue.clear() frameQueue.put(annotatedFrame.copy()) if i == 0: target.start() print("Starting MJPEG stream") i += 1 # FPS smoothing algorithm frameTime = time.time() - lastUpdate frameFps = 1 / frameTime currentFps += (frameFps - currentFps) / fpsSmoothing lastUpdate = time.time() cap.release() else: print("FATAL: Unable to open camera") except KeyboardInterrupt: sys.exit()