def evaluate_model(config): """ Train the model using the passed in config """ ########################################################### # Generate the model ########################################################### outputs = create_generator(config, input_utils.get_data_shape(config.dataset)) ########################################################### # Setup the evaluation metrics and summaries ########################################################### # Generate the canvases that lead to the final output image summaries = [] summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.RNN_OUTPUTS)) with tf.name_scope('canvases'): for step, canvas in enumerate(outputs): canvas = input_utils.reshape_images(canvas, config.dataset) tiled_images = image_utils.tile_images(canvas) summaries.append(tf.summary.image('step{0}'.format(step), tiled_images)) summary_op = tf.summary.merge(summaries, name='summaries') ########################################################### # Begin evaluation ########################################################### checkpoint_path = FLAGS.checkpoint_path if tf.gfile.IsDirectory(checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) eval_ops = tf.group(*outputs) hooks = [ training.SummaryAtEndHook(FLAGS.log_dir, summary_op), training.StopAfterNEvalsHook(FLAGS.count)] training.evaluate_once(checkpoint_path, hooks=hooks, eval_ops=eval_ops)
def graph_rewrite_fn(): """Function to quantize weights and activation of the default graph.""" if (graph_rewriter_config.quantization.weight_bits != 8 or graph_rewriter_config.quantization.activation_bits != 8): raise ValueError('Only 8bit quantization is supported') graph = tf.get_default_graph() # Insert custom quant ops. if quant_overrides_config is not None: input_to_ops_map = input_to_ops.InputToOps(graph) for q in quant_overrides_config.quant_configs: producer = graph.get_operation_by_name(q.op_name) if producer is None: raise ValueError('Op name does not exist in graph.') context = _get_context_from_op(producer) consumers = input_to_ops_map.ConsumerOperations(producer) if q.fixed_range: _insert_fixed_quant_op( context, q.quant_op_name, producer, consumers, init_min=q.min, init_max=q.max, quant_delay=q.delay if is_training else 0) else: raise ValueError('Learned ranges are not yet supported.') # Quantize the graph by inserting quantize ops for weights and activations if is_training: contrib_quantize.experimental_create_training_graph( input_graph=graph, quant_delay=graph_rewriter_config.quantization.delay, freeze_bn_delay=graph_rewriter_config.quantization.delay) else: contrib_quantize.experimental_create_eval_graph( input_graph=graph, quant_delay=graph_rewriter_config.quantization.delay if not is_export else 0) contrib_layers.summarize_collection('quant_vars')
def evaluate_model(config): """ Train the model using the passed in config """ ########################################################### # Create the input pipeline ########################################################### with tf.name_scope('input_pipeline'): dataset = input_utils.get_dataset(config.datadir, config.dataset, config.datasubset, num_folds=config.fold_count, fold=config.fold, holdout=True) init_op, init_feed_dict, image = input_utils.get_data( config.dataset, dataset, config.batch_size, num_epochs=config.num_epochs, num_readers=config.num_readers) images = tf.train.batch([image], config.batch_size, num_threads=config.num_preprocessing_threads, capacity=5 * config.batch_size) ########################################################### # Generate the model ########################################################### outputs = create_model(config, images, dataset) ########################################################### # Setup the evaluation metrics and summaries ########################################################### summaries = [] metrics_map = {} for loss in tf.losses.get_losses(): metrics_map[loss.op.name] = metrics.streaming_mean(loss) for metric in tf.get_collection(graph_utils.GraphKeys.METRICS): metrics_map[metric.op.name] = metrics.streaming_mean(metric) total_loss = tf.losses.get_total_loss() metrics_map[total_loss.op.name] = metrics.streaming_mean(total_loss) names_to_values, names_to_updates = metrics.aggregate_metric_map( metrics_map) # Create summaries of the metrics and print them to the screen for name, value in names_to_values.iteritems(): summary = tf.summary.scalar(name, value, collections=[]) summaries.append(tf.Print(summary, [value], name)) summaries.extend(layers.summarize_collection(tf.GraphKeys.MODEL_VARIABLES)) summaries.extend(layers.summarize_collection( graph_utils.GraphKeys.METRICS)) summaries.extend( layers.summarize_collection(graph_utils.GraphKeys.RNN_OUTPUTS)) summaries.extend( layers.summarize_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS)) images = input_utils.reshape_images(images, config.dataset) tiled_images = image_utils.tile_images(images) summaries.append(tf.summary.image('input_batch', tiled_images)) # Generate the canvases that lead to the final output image with tf.name_scope('canvases'): for step, canvas in enumerate(outputs): canvas = input_utils.reshape_images(canvas, config.dataset) tiled_images = image_utils.tile_images(canvas) summaries.append( tf.summary.image('step{0}'.format(step), tiled_images)) summary_op = tf.summary.merge(summaries, name='summaries') ########################################################### # Begin evaluation ########################################################### checkpoint_path = FLAGS.checkpoint_path eval_ops = tf.group(*names_to_updates.values()) hooks = [ training.SummaryAtEndHook(log_dir=FLAGS.log_dir, summary_op=summary_op), training.StopAfterNEvalsHook( math.ceil(dataset.num_samples / float(config.batch_size))) ] eval_kwargs = {} eval_fn = training.evaluate_repeatedly if FLAGS.once: if tf.gfile.IsDirectory(checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) eval_fn = training.evaluate_once else: assert tf.gfile.IsDirectory(checkpoint_path), ( 'checkpoint path must be a directory when using loop evaluation') eval_fn(checkpoint_path, hooks=hooks, eval_ops=eval_ops, **eval_kwargs)
def train_model(config): """ Train the model using the passed in config """ training_devices = [ graph_utils.device_fn(device) for device in graph_utils.collect_devices({'GPU': FLAGS.num_gpus})] assert training_devices, 'Found no training devices!' ########################################################### # Create the input pipeline ########################################################### with tf.device('/cpu:0'), tf.name_scope('input_pipeline'): dataset = input_utils.get_dataset( config.datadir, config.dataset, 'train', num_folds=config.fold_count, fold=config.fold, holdout=False) init_op, init_feed_dict, image = input_utils.get_data( config.dataset, dataset, config.batch_size, num_epochs=config.num_epochs, num_readers=config.num_readers) inputs_queue = input_utils.batch_images( image, config.batch_size, num_threads=config.num_preprocessing_threads, num_devices=len(training_devices)) ########################################################### # Generate the model ########################################################### towers = graph_utils.create_towers( create_training_model, training_devices, config, inputs_queue, dataset) assert towers, 'No training towers were created!' ########################################################### # Setup the training objectives ########################################################### with tf.name_scope('training'): with tf.device('/cpu:0'): learning_rate_decay_step = config.learning_rate_decay_step / len(towers) learning_rate = tf.maximum( exponential_decay( config.batch_size, learning_rate_decay_step, config.learning_rate, config.learning_rate_decay, dataset), config.learning_rate_min, name='learning_rate') tf.add_to_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS, learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) # Calculate gradients and total loss tower_klds, tower_losses, grads_and_vars = graph_utils.optimize_towers( optimizer, towers, clip_norm=config.clip) total_kld = tf.add_n(tower_klds, name='total_kld') if tower_klds else None total_loss = tf.add_n(tower_losses, name='total_loss') # Gather update ops from the first tower (for updating batch_norm for example) global_step = framework.get_or_create_global_step() update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, towers[0].scope) update_ops.append(optimizer.apply_gradients(grads_and_vars, global_step=global_step)) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_op = tf.identity(total_loss, name='train_op') ########################################################### # Collect summaries ########################################################### with tf.device('/cpu:0'): summaries = [] summaries.extend(learning.add_gradients_summaries(grads_and_vars)) summaries.extend(layers.summarize_collection(tf.GraphKeys.MODEL_VARIABLES)) summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.METRICS)) summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.RNN_OUTPUTS)) summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS)) with tf.name_scope('losses'): if total_kld is not None: summaries.append(tf.summary.scalar('total_kld', total_kld)) summaries.append(tf.summary.scalar('total_loss', total_loss)) for loss in tower_losses: summaries.append(tf.summary.scalar(loss.op.name, loss)) for loss in tf.losses.get_losses(): summaries.append(tf.summary.scalar(loss.op.name, loss)) summary_op = tf.summary.merge(summaries, name='summaries') ########################################################### # Begin training ########################################################### global_init_op = tf.global_variables_initializer() init_op = global_init_op if init_op is None else tf.group(global_init_op, init_op) session_config = tf.ConfigProto( allow_soft_placement=False, log_device_placement=FLAGS.log_device_placement) prefetch_queue_buffer = 2 * len(training_devices) number_of_steps = int(int(dataset.num_samples / config.batch_size) / len(training_devices)) number_of_steps = number_of_steps * config.num_epochs - prefetch_queue_buffer tf.logging.info('Running %s steps', number_of_steps) learning.train( train_op, FLAGS.log_dir, session_config=session_config, global_step=global_step, number_of_steps=number_of_steps, init_op=init_op, init_feed_dict=init_feed_dict, save_interval_secs=config.checkpoint_frequency, summary_op=summary_op, save_summaries_secs=config.summary_frequency, trace_every_n_steps=config.trace_frequency if config.trace_frequency > 0 else None)
def evaluate_model(config): """ Train the model using the passed in config """ ########################################################### # Create the input pipeline ########################################################### with tf.name_scope('input_pipeline'): dataset = input_utils.get_dataset(config.datadir, config.dataset, config.datasubset) init_op, init_feed_dict, image, label = input_utils.get_data( config.dataset, dataset, config.batch_size, num_epochs=config.num_epochs, num_readers=config.num_readers) images, labels = tf.train.batch( [image, label], config.batch_size, num_threads=config.num_preprocessing_threads, capacity=5 * config.batch_size) ########################################################### # Generate the model ########################################################### outputs = create_model(config, images, dataset) tfprof.model_analyzer.print_model_analysis(tf.get_default_graph()) ########################################################### # Setup the evaluation metrics and summaries ########################################################### summaries = [] metrics_map = {} for metric in tf.get_collection(graph_utils.GraphKeys.METRICS): metrics_map[metric.op.name] = metrics.streaming_mean(metric) predictions = tf.argmax(outputs, 1) metrics_map['accuracy'] = metrics.streaming_accuracy(predictions, labels) metrics_map['recall_5'] = metrics.streaming_sparse_recall_at_k( outputs, tf.expand_dims(labels, 1), 5) names_to_values, names_to_updates = metrics.aggregate_metric_map( metrics_map) # Create summaries of the metrics and print them to the screen for name, value in names_to_values.iteritems(): summary = tf.summary.scalar(name, value, collections=[]) summaries.append(tf.Print(summary, [value], name)) summaries.extend(layers.summarize_collection( graph_utils.GraphKeys.METRICS)) summaries.extend( layers.summarize_collection(graph_utils.GraphKeys.QUANTIZED_VARIABLES)) summaries.extend( layers.summarize_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS)) tiled_images = image_utils.tile_images(images) summaries.append(tf.summary.image('input_batch', tiled_images)) summary_op = tf.summary.merge(summaries, name='summaries') ########################################################### # Begin evaluation ########################################################### checkpoint_path = FLAGS.checkpoint_path eval_ops = tf.group(*names_to_updates.values()) scaffold = tf.train.Scaffold(init_op, init_feed_dict) hooks = [ training.SummaryAtEndHook(FLAGS.log_dir, summary_op), training.StopAfterNEvalsHook( math.ceil(dataset.num_samples / float(config.batch_size))) ] eval_kwargs = {} eval_fn = training.evaluate_repeatedly if FLAGS.once: if tf.gfile.IsDirectory(checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) eval_fn = training.evaluate_once else: assert tf.gfile.IsDirectory(checkpoint_path), ( 'checkpoint path must be a directory when using loop evaluation') # On Tensorflow master fd87896 fixes this, but for now just set a very large number eval_kwargs['max_number_of_evaluations'] = sys.maxint eval_fn(checkpoint_path, scaffold=scaffold, hooks=hooks, eval_ops=eval_ops, **eval_kwargs)