def write_fake_checkpoint(model_name, session, checkpoint_dir, moving_average_decay=_MOVING_AVERAGE_DECAY, name='model', height=dv_constants.PILEUP_DEFAULT_HEIGHT, width=dv_constants.PILEUP_DEFAULT_WIDTH, num_channels=dv_constants.PILEUP_NUM_CHANNELS): """Writes a fake TensorFlow checkpoint to checkpoint_dir.""" path = os.path.join(checkpoint_dir, name) with session as sess: model = modeling.get_model(model_name) # Needed to protect ourselves for models without an input image shape. h, w = getattr(model, 'input_image_shape', (height, width)) images = tf.compat.v1.placeholder(tf.float32, shape=(4, h, w, num_channels)) model.create(images, num_classes=3, is_training=True) # This is gross, but necessary as model_eval assumes the model was trained # with model_train which uses exp moving averages. Unfortunately we cannot # just call into model_train as it uses FLAGS which conflict with the # flags in use by model_eval. So we inline the creation of the EMA here. variable_averages = tf.train.ExponentialMovingAverage( moving_average_decay, tf.compat.v1.train.get_or_create_global_step()) tf.compat.v1.add_to_collection( tf.compat.v1.GraphKeys.UPDATE_OPS, variable_averages.apply(slim.get_model_variables())) sess.run(tf.compat.v1.global_variables_initializer()) save = tf.compat.v1.train.Saver(slim.get_variables()) save.save(sess, path) return path
def _write_fake_checkpoint(self, model_name, checkpoint_dir=None, name='model'): if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir path = os.path.join(checkpoint_dir, name) with self.test_session() as sess: model = modeling.get_model(model_name) # Needed to protect ourselves for models without an input image shape. h, w = getattr(model, 'input_image_shape', (100, 221)) images = tf.placeholder( tf.float32, shape=(4, h, w, pileup_image.DEFAULT_NUM_CHANNEL)) model.create(images, num_classes=3, is_training=True) # This is gross, but necessary as model_eval assumes the model was trained # with model_train which uses exp moving averages. Unfortunately we cannot # just call into model_train as it uses FLAGS which conflict with the # flags in use by model_eval. So we inline the creation of the EMA here. variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf.train.get_or_create_global_step()) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, variable_averages.apply( tf.contrib.framework.get_model_variables())) sess.run(tf.global_variables_initializer()) save = tf.train.Saver(tf.contrib.framework.get_variables()) save.save(sess, path) return path
def test_call_variants_with_empty_input(self): source_path = test_utils.test_tmpfile('empty.tfrecord') io_utils.write_tfrecords([], source_path) # Make sure that prepare_inputs don't crash on empty input. call_variants.prepare_inputs(source_path, modeling.get_model('random_guess'), batch_size=1)
def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: call_variants does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() if FLAGS.use_tpu: master = tf_utils.resolve_master(FLAGS.master, FLAGS.tpu_name, FLAGS.tpu_zone, FLAGS.gcp_project) else: master = '' model = modeling.get_model(FLAGS.model_name) call_variants( examples_filename=FLAGS.examples, checkpoint_path=FLAGS.checkpoint, model=model, execution_hardware=FLAGS.execution_hardware, output_file=FLAGS.outfile, max_batches=FLAGS.max_batches, batch_size=FLAGS.batch_size, master=master, use_tpu=FLAGS.use_tpu, )
def main(_): tensor_shape = [FLAGS.height, FLAGS.width, FLAGS.channels] logging.info('Processing ckpt=%s, tensor_shape=%s.', FLAGS.checkpoint, tensor_shape) freeze_graph(modeling.get_model('inception_v3'), FLAGS.checkpoint, tensor_shape, FLAGS.output) logging.info('Output written to %s.', FLAGS.output)
def _write_fake_checkpoint(self, model_name, checkpoint_dir=None, name='model'): if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir path = os.path.join(checkpoint_dir, name) with self.test_session() as sess: model = modeling.get_model(model_name) # Needed to protect ourselves for models without an input image shape. h, w = getattr(model, 'input_image_shape', (100, 221)) images = tf.placeholder(tf.float32, shape=(4, h, w, pileup_image.DEFAULT_NUM_CHANNEL)) model.create(images, num_classes=3, is_training=True) # This is gross, but necessary as model_eval assumes the model was trained # with model_train which uses exp moving averages. Unfortunately we cannot # just call into model_train as it uses FLAGS which conflict with the # flags in use by model_eval. So we inline the creation of the EMA here. variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf.train.get_or_create_global_step()) tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, variable_averages.apply( tf.contrib.framework.get_model_variables())) sess.run(tf.global_variables_initializer()) save = tf.train.Saver(tf.contrib.framework.get_variables()) save.save(sess, path) return path
def run(target, unused_is_chief, device_fn, use_tpu): """Run training. Args: target: The target of the TensorFlow standard server to use. Can be the empty string to run locally using an inprocess server. device_fn: Device function used to assign ops to devices. use_tpu: turn on tpu code path. """ if not FLAGS.dataset_config_pbtxt: logging.error('Need to specify --dataset_config_pbtxt') return g = tf.Graph() with g.as_default(): with tf.device(device_fn): # If ps_tasks is zero, the local device is used. When using multiple # (non-local) replicas, the ReplicaDeviceSetter distributes the variables # across the different devices. tf_dataset = data_providers.get_input_fn_from_dataset( dataset_config_filename=FLAGS.dataset_config_pbtxt, mode=tf.estimator.ModeKeys.TRAIN, max_examples=FLAGS.max_examples, use_tpu=use_tpu) model = modeling.get_model(FLAGS.model_name) logging.info('Running training on %s with model %s and tpu %s', tf_dataset, FLAGS.model_name, use_tpu) batches_per_epoch = tf_dataset.num_examples // FLAGS.batch_size logging.info('Batches per epoch %s', batches_per_epoch) params = dict(batches_per_epoch=batches_per_epoch,) estimator = model.make_estimator( batch_size=FLAGS.batch_size, model_dir=FLAGS.train_dir, params=params, use_tpu=use_tpu, master=target, start_from_checkpoint=FLAGS.start_from_checkpoint, ) training_hooks = None if FLAGS.use_early_stopping: # redacted raise ValueError('Currently not implemented.') estimator.train( input_fn=tf_dataset, max_steps=FLAGS.number_of_steps, hooks=training_hooks)
def assertCallVariantsEmitsNRecordsForRandomGuess(self, filename, num_examples): outfile = test_utils.test_tmpfile('call_variants.tfrecord') model = modeling.get_model('random_guess') call_variants.call_variants( examples_filename=filename, checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST, model=model, output_file=outfile, batch_size=4, max_batches=None) call_variants_outputs = list( io_utils.read_tfrecords(outfile, deepvariant_pb2.CallVariantsOutput)) # Check that we have the right number of output protos. self.assertEqual(len(call_variants_outputs), num_examples)
def test_call_variants_with_empty_input(self): source_path = test_utils.test_tmpfile('empty.tfrecord') tfrecord.write_tfrecords([], source_path) # Make sure that prepare_inputs don't crash on empty input. ds = call_variants.prepare_inputs(source_path) m = modeling.get_model('random_guess') # The API specifies that OutOfRangeError is thrown in this case. batches = list(_get_infer_batches(ds, model=m, batch_size=1)) with self.test_session() as sess: sess.run(tf.compat.v1.local_variables_initializer()) sess.run(tf.compat.v1.global_variables_initializer()) try: _ = sess.run(batches) except tf.errors.OutOfRangeError: pass
def assertCallVariantsEmitsNRecordsForInceptionV3(self, filename, num_examples): outfile = test_utils.test_tmpfile('inception_v3.call_variants.tfrecord') model = modeling.get_model('inception_v3') checkpoint_path = _LEAVE_MODEL_UNINITIALIZED call_variants.call_variants( examples_filename=filename, checkpoint_path=checkpoint_path, model=model, output_file=outfile, batch_size=4, max_batches=None) call_variants_outputs = list( io_utils.read_tfrecords(outfile, deepvariant_pb2.CallVariantsOutput)) # Check that we have the right number of output protos. self.assertEqual(len(call_variants_outputs), num_examples)
def assertCallVariantsEmitsNRecordsForRandomGuess(self, filename, num_examples): checkpoint_path = _LEAVE_MODEL_UNINITIALIZED outfile = test_utils.test_tmpfile('call_variants.tfrecord') model = modeling.get_model('random_guess') call_variants.call_variants(examples_filename=filename, checkpoint_path=checkpoint_path, model=model, output_file=outfile, batch_size=4, max_batches=None, master='', use_tpu=FLAGS.use_tpu) call_variants_outputs = list( tfrecord.read_tfrecords(outfile, deepvariant_pb2.CallVariantsOutput)) # Check that we have the right number of output protos. self.assertEqual(len(call_variants_outputs), num_examples)
def test_end2end(self, model_name, mock_get_dataset): """End-to-end test of model_eval.""" checkpoint_dir = tf.test.get_temp_dir() # Create a model with 3 classes, and save it to our checkpoint dir. with self.test_session() as sess: model = modeling.get_model(model_name) # Needed to protect ourselves for models without an input image shape. h, w = getattr(model, 'input_image_shape', (100, 221)) images = tf.placeholder(tf.float32, shape=(4, h, w, pileup_image.DEFAULT_NUM_CHANNEL)) model.create(images, num_classes=3, is_training=True) # This is gross, but necessary as model_eval assumes the model was trained # with model_train which uses exp moving averages. Unfortunately we cannot # just call into model_train as it uses FLAGS which conflict with the # flags in use by model_eval. So we inline the creation of the EMA here. variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, slim.get_or_create_global_step()) tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, variable_averages.apply(slim.get_model_variables())) sess.run(tf.global_variables_initializer()) save = tf.train.Saver(slim.get_variables()) save.save(sess, os.path.join(checkpoint_dir, 'model')) # Start up eval, loading that checkpoint. FLAGS.batch_size = 2 FLAGS.checkpoint_dir = checkpoint_dir FLAGS.eval_dir = tf.test.get_temp_dir() FLAGS.batches_per_eval_step = 1 FLAGS.max_evaluations = 1 FLAGS.eval_interval_secs = 0 FLAGS.model_name = model_name FLAGS.dataset_config_pbtxt = '/path/to/mock.pbtxt' # Always try to read in compressed inputs to stress that case. Uncompressed # inputs are certain to work. This test is expensive to run, so we want to # minimize the number of times we need to run this. mock_get_dataset.return_value = data_providers_test.make_golden_dataset( compressed_inputs=True) model_eval.main(0) mock_get_dataset.assert_called_once_with(FLAGS.dataset_config_pbtxt)
def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: call_variants does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() model = modeling.get_model(FLAGS.model_name) call_variants( examples_filename=FLAGS.examples, checkpoint_path=FLAGS.checkpoint, model=model, execution_hardware=FLAGS.execution_hardware, output_file=FLAGS.outfile, max_batches=FLAGS.max_batches, batch_size=FLAGS.batch_size)
def main(argv=()): with errors.clean_commandline_error_exit(): if len(argv) > 1: errors.log_and_raise( 'Command line parsing failure: call_variants does not accept ' 'positional arguments but some are present on the command line: ' '"{}".'.format(str(argv)), errors.CommandLineError) del argv # Unused. proto_utils.uses_fast_cpp_protos_or_die() logging_level.set_from_flag() # Give htslib authentication access to GCS. htslib_gcp_oauth.init() model = modeling.get_model(FLAGS.model_name) call_variants(examples_filename=FLAGS.examples, checkpoint_path=FLAGS.checkpoint, model=model, execution_hardware=FLAGS.execution_hardware, output_file=FLAGS.outfile, max_batches=FLAGS.max_batches, batch_size=FLAGS.batch_size)
def setUpClass(cls): cls.model = modeling.get_model('mobilenet_v1')
def setUpClass(cls): cls.model = modeling.get_model('resnet_v2_50')
def eval_loop(master, dataset_config_pbtxt, checkpoint_dir, model_name, batch_size, max_examples, eval_name, max_evaluations, use_tpu=False): """Evaluate incoming checkpoints, until the specified end.""" logging.info('Running fixed eval for: %s', dataset_config_pbtxt) tf_dataset = data_providers.get_input_fn_from_dataset( dataset_config_filename=dataset_config_pbtxt, mode=tf.estimator.ModeKeys.EVAL, use_tpu=use_tpu, ) best_ckpt = None ckpt_metric = FLAGS.best_checkpoint_metric ckpt_metric_increasing = ckpt_metric in increasing_metrics model = modeling.get_model(model_name) logging.info('Running evaluations on %s with model %s', tf_dataset, model) # Compute when to stop reading, in terms of batches. num_examples = tf_dataset.num_examples if max_examples is not None: num_examples = min(max_examples, num_examples) num_batches = num_examples // batch_size num_samples = batch_size * num_batches logging.info( 'Dataset has %s samples, doing eval over %s; ' 'max_examples is %s, num examples to be used %s; num_batches is %s', tf_dataset.num_examples, num_samples, max_examples, num_examples, num_batches) # This loads EMA variables. eval_hooks = [h(checkpoint_dir) for h in model.session_eval_hooks()] classifier = model.make_estimator(batch_size=batch_size, model_dir=checkpoint_dir, use_tpu=use_tpu, master=master) def terminate_eval(): logging.info('Terminating eval after %d seconds of no checkpoints', FLAGS.eval_timeout) return True # Run evaluation when there's a new checkpoint num_evaluations = 0 for ckpt in checkpoints_iterator( checkpoint_dir=checkpoint_dir, min_interval_secs=FLAGS.min_eval_interval_s, timeout=FLAGS.eval_timeout, timeout_fn=terminate_eval): logging.info('Starting to evaluate.') # For each step, calls input_fn, which returns one batch of data. # Evaluates until either steps batches are processed, or input_fn raises an # end-of-input exception (OutOfRangeError or StopIteration). eval_results = classifier.evaluate(input_fn=tf_dataset, steps=num_batches, hooks=eval_hooks, checkpoint_path=ckpt, name=eval_name) logging.info('Eval results: %s', eval_results) # Track best checkpoint seen so far, measured by ckpt_metric. if not best_ckpt: # If the training jobs died, pick up where we left off. try: best_metrics = read_metrics(ckpt, eval_name, 'best_checkpoint.metrics') logging.info('Found existing best_checkpoint: %s', best_metrics) best_ckpt = (best_metrics, ckpt) except NotFoundError: logging.info('best_checkpoint file does not exist.') best_ckpt = (eval_results, ckpt) _write_best_checkpoint(ckpt, eval_results, eval_name) if ((ckpt_metric_increasing and eval_results[ckpt_metric] > best_ckpt[0][ckpt_metric]) or (not ckpt_metric_increasing and eval_results[ckpt_metric] < best_ckpt[0][ckpt_metric])): best_ckpt = (eval_results, ckpt) _write_best_checkpoint(ckpt, eval_results, eval_name) _write_checkpoint_metrics(ckpt, eval_results, eval_name) # An alternative strategy might check step-number-of-ckpt >= train_steps. num_evaluations += 1 if max_evaluations is not None and num_evaluations >= max_evaluations: logging.info('Evaluation finished after %d evaluations', num_evaluations) break return
def test_call_variants_with_empty_input(self): source_path = test_utils.test_tmpfile('empty.tfrecord') io_utils.write_tfrecords([], source_path) # Make sure that prepare_inputs don't crash on empty input. call_variants.prepare_inputs( source_path, modeling.get_model('random_guess'), batch_size=1)
def run(target, is_chief, device_fn): """Run training. Args: target: The target of the TensorFlow standard server to use. Can be the empty string to run locally using an inprocess server. is_chief: Boolean indicating whether this process is the chief. device_fn: Device function used to assign ops to devices. """ if not FLAGS.dataset_config_pbtxt: logging.error('Need to specify --dataset_config_pbtxt') return g = tf.Graph() with g.as_default(): model = modeling.get_model(FLAGS.model_name) dataset = data_providers.get_dataset(FLAGS.dataset_config_pbtxt) print('Running training on {} with model {}\n'.format(dataset, model)) with tf.device(device_fn): # If ps_tasks is zero, the local device is used. When using multiple # (non-local) replicas, the ReplicaDeviceSetter distributes the variables # across the different devices. images, labels, _ = data_providers.make_batches( dataset.get_slim_dataset(), model, FLAGS.batch_size, mode='TRAIN') endpoints = model.create(images, dataset.num_classes, is_training=True) labels = slim.one_hot_encoding(labels, dataset.num_classes) total_loss = loss( endpoints['Logits'], labels, label_smoothing=FLAGS.label_smoothing) # Setup the moving averages: moving_average_variables = slim.get_model_variables() moving_average_variables.extend(slim.losses.get_losses()) moving_average_variables.append(total_loss) variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, slim.get_or_create_global_step()) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, variable_averages.apply(moving_average_variables)) # Configure the learning rate using an exponetial decay. decay_steps = int(((1.0 * dataset.num_examples) / FLAGS.batch_size) * FLAGS.num_epochs_per_decay) learning_rate = tf.train.exponential_decay( FLAGS.learning_rate, slim.get_or_create_global_step(), decay_steps, FLAGS.learning_rate_decay_factor, staircase=True) opt = tf.train.RMSPropOptimizer(learning_rate, FLAGS.rmsprop_decay, FLAGS.rmsprop_momentum, FLAGS.rmsprop_epsilon) # Create training op train_tensor = slim.learning.create_train_op( total_loss, optimizer=opt, update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS)) # Summaries: slim.summaries.add_histogram_summaries(slim.get_model_variables()) slim.summaries.add_scalar_summaries(slim.losses.get_losses(), 'losses') slim.summaries.add_scalar_summary(total_loss, 'Total_Loss', 'losses') slim.summaries.add_scalar_summary(learning_rate, 'Learning_Rate', 'training') slim.summaries.add_histogram_summaries(endpoints.values()) slim.summaries.add_zero_fraction_summaries(endpoints.values()) # redacted # Set start-up delay startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps init_fn = model_init_function(model, dataset.num_classes, FLAGS.start_from_checkpoint) saver = tf.train.Saver( max_to_keep=FLAGS.max_checkpoints_to_keep, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours) # Train model slim.learning.train( train_tensor, number_of_steps=FLAGS.number_of_steps, logdir=FLAGS.train_dir, master=target, init_fn=init_fn, is_chief=is_chief, saver=saver, startup_delay_steps=startup_delay_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def eval_loop(master, dataset_config_pbtxt, checkpoint_dir, model_name, batch_size, max_examples, eval_name, max_evaluations, use_tpu=False): """Evaluate incoming checkpoints, until the specified end.""" logging.info('Running fixed eval for: %s', dataset_config_pbtxt) tf_dataset = data_providers.get_input_fn_from_dataset( dataset_config_filename=dataset_config_pbtxt, mode=tf.estimator.ModeKeys.EVAL, use_tpu=use_tpu, ) model = modeling.get_model(model_name) logging.info('Running evaluations on %s with model %s', tf_dataset, model) # Compute when to stop reading, in terms of batches. num_batches = min(max_examples, tf_dataset.num_examples) // batch_size num_samples = batch_size * num_batches logging.info( 'Dataset has %d samples, doing eval over %d; ' 'max_examples is %d, num_batches is %d', tf_dataset.num_examples, num_samples, max_examples, num_batches) batches_per_epoch = tf_dataset.num_examples / batch_size # This loads EMA variables. eval_hooks = [h(checkpoint_dir) for h in model.session_eval_hooks()] classifier = model.make_estimator( batch_size=batch_size, model_dir=checkpoint_dir, params={'batches_per_epoch': batches_per_epoch}, use_tpu=use_tpu, master=master, ) def terminate_eval(): logging.info('Terminating eval after %d seconds of no checkpoints', FLAGS.eval_timeout) return True # Run evaluation when there's a new checkpoint num_evaluations = 0 for ckpt in checkpoints_iterator( checkpoint_dir=checkpoint_dir, min_interval_secs=FLAGS.min_eval_interval_s, timeout=FLAGS.eval_timeout, timeout_fn=terminate_eval): logging.info('Starting to evaluate.') # For each step, calls input_fn, which returns one batch of data. # Evaluates until either steps batches are processed, or input_fn raises an # end-of-input exception (OutOfRangeError or StopIteration). eval_results = classifier.evaluate(input_fn=tf_dataset, steps=num_batches, hooks=eval_hooks, checkpoint_path=ckpt, name=eval_name) logging.info('Eval results: %s', eval_results) _write_checkpoint_metrics(ckpt, eval_results, eval_name) # An alternative strategy might check step-number-of-ckpt >= train_steps. num_evaluations += 1 if max_evaluations is not None and num_evaluations >= max_evaluations: logging.info('Evaluation finished after %d evaluations', num_evaluations) break return
def setUpClass(cls): super(InceptionV3ModelTest, cls).setUpClass() cls.model = modeling.get_model('inception_v3')
def eval_loop(master, dataset_config_pbtxt, checkpoint_dir, model_name, batch_size, moving_average_decay, max_examples, eval_dir, max_evaluations): logging.info('Running fixed eval for: %s', dataset_config_pbtxt) num_evaluations = 0 for checkpoint_path in checkpoints_iterator(checkpoint_dir): logging.info('Using checkpoint %s %d', checkpoint_path, num_evaluations) g = tf.Graph() with g.as_default(): tf_global_step = tf.train.get_or_create_global_step() # redacted model = modeling.get_model(model_name) dataset = data_providers.get_dataset(dataset_config_pbtxt) logging.info('Running evaluations on %s with model %s', dataset, model) images, labels, encoded_variant = data_providers.make_batches( dataset.get_slim_dataset(), model, batch_size, mode='EVAL') endpoints = model.create(images, dataset.num_classes, is_training=False) predictions = tf.argmax(endpoints['Predictions'], 1) # For eval, explicitly add moving_mean and moving_variance variables to # the MOVING_AVERAGE_VARIABLES collection. variable_averages = tf.train.ExponentialMovingAverage( moving_average_decay, tf_global_step) for var in tf.get_collection('moving_vars'): tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var) for var in slim.get_model_variables(): tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var) variables_to_restore = variable_averages.variables_to_restore() variables_to_restore[tf_global_step.op.name] = tf_global_step names_to_values, names_to_updates = make_metrics( predictions, labels, encoded_variant) for name, value in names_to_values.iteritems(): slim.summaries.add_scalar_summary(value, name, print_summary=True) num_batches = int( math.floor( min(max_examples, dataset.num_examples) / float(batch_size))) num_samples = batch_size * num_batches logging.info('Dataset has %d samples, doing eval over %d', dataset.num_examples, num_samples) names_to_values = slim.evaluation.evaluate_once( master=master, checkpoint_path=checkpoint_path, logdir=eval_dir, variables_to_restore=variables_to_restore, num_evals=num_batches, initial_op=tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()), eval_op=names_to_updates.values(), final_op=names_to_values, ) # --- LOW LEVEL [WIP], hangs, initialization seems busted --- # This is (marginally) nicer as it can eliminate the slim dep. # saver = tf.train.Saver(variables_to_restore) # scaffold = tf.train.Scaffold(saver=saver) # names_to_values = tf.contrib.training.evaluate_once( # checkpoint_path=checkpoint_path, # master=FLAGS.master, # scaffold=scaffold, # eval_ops=names_to_updates.values(), # final_ops=names_to_values, # ) _write_checkpoint_metrics(checkpoint_path, names_to_values, eval_dir=eval_dir) num_evaluations += 1 if max_evaluations is not None and num_evaluations >= max_evaluations: return
def setUpClass(cls): cls.model = modeling.get_model('inception_v2')
def test_get_model_existing_models(self, model_name, expected_class): self.assertIsInstance(modeling.get_model(model_name), expected_class)
def setUpClass(cls): cls.examples = list( io_utils.read_tfrecords(testdata.GOLDEN_CALLING_EXAMPLES)) cls.variants = [tf_utils.example_variant(ex) for ex in cls.examples] cls.model = modeling.get_model('random_guess')
def setUpClass(cls): super(InceptionV3EmbeddingModelTest, cls).setUpClass() cls.model = modeling.get_model('inception_v3_embedding')
def main(_): proto_utils.uses_fast_cpp_protos_or_die() if not FLAGS.dataset_config_pbtxt: logging.error('Need to specify --dataset_config_pbtxt') logging_level.set_from_flag() g = tf.Graph() with g.as_default(): tf_global_step = slim.get_or_create_global_step() model = modeling.get_model(FLAGS.model_name) dataset = data_providers.get_dataset(FLAGS.dataset_config_pbtxt) print('Running evaluations on {} with model {}\n'.format( dataset, model)) batch = data_providers.make_training_batches( dataset.get_slim_dataset(), model, FLAGS.batch_size) images, labels, encoded_truth_variants = batch endpoints = model.create(images, dataset.num_classes, is_training=False) predictions = tf.argmax(endpoints['Predictions'], 1) # For eval, explicitly add moving_mean and moving_variance variables to # the MOVING_AVERAGE_VARIABLES collection. variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) for var in tf.get_collection('moving_vars'): tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var) for var in slim.get_model_variables(): tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var) variables_to_restore = variable_averages.variables_to_restore() variables_to_restore[tf_global_step.op.name] = tf_global_step # Define the metrics: metrics = { 'Accuracy': tf.contrib.metrics.streaming_accuracy, 'Mean_absolute_error': tf.contrib.metrics.streaming_mean_absolute_error, 'FPs': tf.contrib.metrics.streaming_false_positives, 'FNs': tf.contrib.metrics.streaming_false_negatives, } def _make_selector(func): return select_variants_weights(func, encoded_truth_variants) selectors = { 'All': None, 'SNPs': _make_selector(variantutils.is_snp), 'Indels': _make_selector(variantutils.is_indel), 'Insertions': _make_selector(variantutils.has_insertion), 'Deletions': _make_selector(variantutils.has_deletion), 'BiAllelic': _make_selector(variantutils.is_biallelic), 'MultiAllelic': _make_selector(variantutils.is_multiallelic), # These haven't proven particularly useful, but are commented out here # in case someone wants to do some more explorations. # 'HomRef': tf.equal(labels, 0), # 'Het': tf.equal(labels, 1), # 'HomAlt': tf.equal(labels, 2), # 'NonRef': tf.greater(labels, 0), } metrics = calling_metrics(metrics, selectors, predictions, labels) names_to_values, names_to_updates = slim.metrics.aggregate_metric_map( metrics) for name, value in names_to_values.iteritems(): slim.summaries.add_scalar_summary(value, name, print_summary=True) slim.evaluation.evaluation_loop( FLAGS.master, FLAGS.checkpoint_dir, logdir=FLAGS.eval_dir, num_evals=FLAGS.batches_per_eval_step, eval_op=names_to_updates.values(), variables_to_restore=variables_to_restore, max_number_of_evaluations=FLAGS.max_evaluations, eval_interval_secs=FLAGS.eval_interval_secs)
def setUpClass(cls): super(InceptionV3AttentionModelTest, cls).setUpClass() cls.model = modeling.get_model('attention_inception_v3', attention_module='se_block', attention_position='all')
def eval_loop(master, dataset_config_pbtxt, checkpoint_dir, model_name, batch_size, moving_average_decay, max_examples, eval_dir, max_evaluations): logging.info('Running fixed eval for: %s', dataset_config_pbtxt) num_evaluations = 0 for checkpoint_path in checkpoints_iterator(checkpoint_dir): logging.info('Using checkpoint %s %d', checkpoint_path, num_evaluations) g = tf.Graph() with g.as_default(): tf_global_step = tf.train.get_or_create_global_step() # redacted model = modeling.get_model(model_name) dataset = data_providers.get_dataset(dataset_config_pbtxt) logging.info('Running evaluations on %s with model %s', dataset, model) images, labels, encoded_variant = data_providers.make_batches( dataset.get_slim_dataset(), model, batch_size, mode='EVAL') endpoints = model.create(images, dataset.num_classes, is_training=False) predictions = tf.argmax(endpoints['Predictions'], 1) # For eval, explicitly add moving_mean and moving_variance variables to # the MOVING_AVERAGE_VARIABLES collection. variable_averages = tf.train.ExponentialMovingAverage( moving_average_decay, tf_global_step) for var in tf.get_collection('moving_vars'): tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var) for var in slim.get_model_variables(): tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var) variables_to_restore = variable_averages.variables_to_restore() variables_to_restore[tf_global_step.op.name] = tf_global_step names_to_values, names_to_updates = make_metrics(predictions, labels, encoded_variant) for name, value in names_to_values.iteritems(): slim.summaries.add_scalar_summary(value, name, print_summary=True) num_batches = int( math.floor( min(max_examples, dataset.num_examples) / float(batch_size))) num_samples = batch_size * num_batches logging.info('Dataset has %d samples, doing eval over %d', dataset.num_examples, num_samples) names_to_values = slim.evaluation.evaluate_once( master=master, checkpoint_path=checkpoint_path, logdir=eval_dir, variables_to_restore=variables_to_restore, num_evals=num_batches, initial_op=tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()), eval_op=names_to_updates.values(), final_op=names_to_values, ) # --- LOW LEVEL [WIP], hangs, initialization seems busted --- # This is (marginally) nicer as it can eliminate the slim dep. # saver = tf.train.Saver(variables_to_restore) # scaffold = tf.train.Scaffold(saver=saver) # names_to_values = tf.contrib.training.evaluate_once( # checkpoint_path=checkpoint_path, # master=FLAGS.master, # scaffold=scaffold, # eval_ops=names_to_updates.values(), # final_ops=names_to_values, # ) _write_checkpoint_metrics( checkpoint_path, names_to_values, eval_dir=eval_dir) num_evaluations += 1 if max_evaluations is not None and num_evaluations >= max_evaluations: return
def run(target, unused_is_chief, device_fn, use_tpu): """Run training. Args: target: The target of the TensorFlow standard server to use. Can be the empty string to run locally using an inprocess server. device_fn: Device function used to assign ops to devices. use_tpu: turn on tpu code path. """ if not FLAGS.dataset_config_pbtxt: logging.error('Need to specify --dataset_config_pbtxt') return g = tf.Graph() with g.as_default(): with tf.device(device_fn): # If ps_tasks is zero, the local device is used. When using multiple # (non-local) replicas, the ReplicaDeviceSetter distributes the variables # across the different devices. tf_dataset = data_providers.get_input_fn_from_dataset( dataset_config_filename=FLAGS.dataset_config_pbtxt, mode=tf.estimator.ModeKeys.TRAIN, max_examples=FLAGS.max_examples, use_tpu=use_tpu) model = modeling.get_model(FLAGS.model_name) logging.info('Running training on %s with model %s and tpu %s', tf_dataset, FLAGS.model_name, use_tpu) batches_per_epoch = tf_dataset.num_examples // FLAGS.batch_size logging.info('Batches per epoch %s', batches_per_epoch) params = dict(batches_per_epoch=batches_per_epoch,) estimator = model.make_estimator( batch_size=FLAGS.batch_size, model_dir=FLAGS.train_dir, params=params, use_tpu=use_tpu, master=target, start_from_checkpoint=FLAGS.start_from_checkpoint, ) training_hooks = None if FLAGS.use_early_stopping: # Early stopping hook depends on existence of events directory. eval_dir = os.path.join(FLAGS.train_dir, FLAGS.early_stopping_directory) tf.gfile.MakeDirs(eval_dir) plateau_decrease = True if FLAGS.early_stopping_metric_direction == 'increase': plateau_decrease = False early_stopping_hook = metrics_hook.EarlyStoppingHook( events_dir=eval_dir, tag=FLAGS.early_stopping_tag, num_plateau_steps=FLAGS.early_stopping_num_plateau_steps, plateau_delta=FLAGS.early_stopping_plateau_delta, plateau_decrease=plateau_decrease, every_n_steps=FLAGS.early_stopping_every_n_steps) training_hooks = [early_stopping_hook] estimator.train( input_fn=tf_dataset, max_steps=FLAGS.number_of_steps, hooks=training_hooks)
def test_get_model_unknown_model_signals_error(self): with six.assertRaisesRegex(self, ValueError, 'Unknown model'): modeling.get_model('unknown_model_1234')
def run(target, is_chief, device_fn): """Run training. Args: target: The target of the TensorFlow standard server to use. Can be the empty string to run locally using an inprocess server. is_chief: Boolean indicating whether this process is the chief. device_fn: Device function used to assign ops to devices. """ if not FLAGS.dataset_config_pbtxt: logging.error('Need to specify --dataset_config_pbtxt') return g = tf.Graph() with g.as_default(): model = modeling.get_model(FLAGS.model_name) dataset = data_providers.get_dataset(FLAGS.dataset_config_pbtxt) print('Running training on {} with model {}\n'.format(dataset, model)) with tf.device(device_fn): # If ps_tasks is zero, the local device is used. When using multiple # (non-local) replicas, the ReplicaDeviceSetter distributes the variables # across the different devices. images, labels, _ = data_providers.make_training_batches( dataset.get_slim_dataset(), model, FLAGS.batch_size) endpoints = model.create(images, dataset.num_classes, is_training=True) labels = slim.one_hot_encoding(labels, dataset.num_classes) total_loss = model.loss(endpoints, labels) # Setup the moving averages: moving_average_variables = slim.get_model_variables() moving_average_variables.extend(slim.losses.get_losses()) moving_average_variables.append(total_loss) variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, slim.get_or_create_global_step()) tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, variable_averages.apply(moving_average_variables)) # Configure the learning rate using an exponetial decay. decay_steps = int( ((1.0 * dataset.num_examples) / FLAGS.batch_size) * FLAGS.num_epochs_per_decay) learning_rate = tf.train.exponential_decay( FLAGS.learning_rate, slim.get_or_create_global_step(), decay_steps, FLAGS.learning_rate_decay_factor, staircase=True) opt = tf.train.RMSPropOptimizer(learning_rate, FLAGS.rmsprop_decay, FLAGS.rmsprop_momentum, FLAGS.rmsprop_epsilon) # Create training op train_tensor = slim.learning.create_train_op( total_loss, optimizer=opt, update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS)) # Summaries: slim.summaries.add_histogram_summaries(slim.get_model_variables()) slim.summaries.add_scalar_summaries(slim.losses.get_losses(), 'losses') slim.summaries.add_scalar_summary(total_loss, 'Total_Loss', 'losses') slim.summaries.add_scalar_summary(learning_rate, 'Learning_Rate', 'training') slim.summaries.add_histogram_summaries(endpoints.values()) slim.summaries.add_zero_fraction_summaries(endpoints.values()) # redacted # Set start-up delay startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps init_fn = model_init_function(model, dataset.num_classes, FLAGS.start_from_checkpoint) saver = tf.train.Saver(max_to_keep=FLAGS.max_checkpoints_to_keep, keep_checkpoint_every_n_hours=FLAGS. keep_checkpoint_every_n_hours) # Train model slim.learning.train(train_tensor, number_of_steps=FLAGS.number_of_steps, logdir=FLAGS.train_dir, master=target, init_fn=init_fn, is_chief=is_chief, saver=saver, startup_delay_steps=startup_delay_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
help='The decay to use for the moving average') parser.add_argument('--channels', default=6, type=int, help='Number of channels in input tensor') parser.add_argument('--width', default=221, type=int, help='Width of the input tensor') parser.add_argument('--height', default=100, type=int, help='Height of the input tensor') args = parser.parse_args() model = get_model('inception_v3') out_node = 'InceptionV3/Predictions/Reshape_1' in_node = 'input' inp = tf.compat.v1.placeholder( shape=[1, args.height, args.width, args.channels], dtype=tf.float32, name=in_node) b = model.create(inp, num_classes=3, is_training=False) ema = tf.train.ExponentialMovingAverage(args.moving_average_decay) variables_to_restore = ema.variables_to_restore() load_ema = slim.assign_from_checkpoint_fn(args.checkpoint, variables_to_restore,
def test_get_model_unknown_model_signals_error(self): with self.assertRaisesRegexp(ValueError, 'Unknown model'): modeling.get_model('unknown_model_1234')