Пример #1
0
 def validate_train_hook_name(self,
                              test_hook_name,
                              expected_hook_name,
                              **kwargs):
     returned_hook = hooks_helper.get_train_hooks([test_hook_name], **kwargs)
     self.assertEqual(len(returned_hook), 1)
     self.assertIsInstance(returned_hook[0], tf.train.SessionRunHook)
     self.assertEqual(returned_hook[0].__class__.__name__.lower(),
                      expected_hook_name)
Пример #2
0
def main(argv):
    parser = ArgParser()
    flags = parser.parse_args(args=argv[1:])
    model_function = model_fn
    data_format = flags.data_format
    if data_format is None:
        data_format = ('channels_first'
                       if tf.test.is_built_with_cuda() else 'channels_last')
    classifier = tf.estimator.Estimator(model_fn=model_function,
                                        model_dir=flags.model_dir,
                                        params={
                                            'data_format': data_format,
                                            'image_size': flags.image_size
                                        })

    def train_input_fn():
        # ds = load_data(os.path.join(os.path.join(flags.data_root, 'data'), 'train'), flags.image_size)

        ds = prepare_dataset_pair(flags.data_root, 'train', 10)
        ds = ds.cache().shuffle(buffer_size=50000).batch(flags.batch_size)
        ds = ds.repeat(flags.epochs_between_evals)
        return ds

    def eval_input_fn():
        testset = prepare_dataset_pair(flags.data_root, 'test', 10)
        return testset.batch(
            flags.batch_size).make_one_shot_iterator().get_next()

    train_hooks = hooks_helper.get_train_hooks(flags.hooks,
                                               batch_size=flags.batch_size)

    # Train and evaluate model.
    for _ in range(flags.train_epochs // flags.epochs_between_evals):
        classifier.train(input_fn=train_input_fn, hooks=train_hooks)
        eval_results = classifier.evaluate(input_fn=eval_input_fn)
        print('\nEvaluation results:\n\t%s\n' % eval_results)

    # Export the model
    image = tf.placeholder(tf.float32,
                           [None, flags.image_size, flags.image_size])
    input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
        'image':
        image,
    })
    classifier.export_savedmodel(flags.export_dir, input_fn)
    shutil.rmtree(flags.model_dir)
def main(argv):
    parser = ResnetArgParser()

    parser.add_argument(
        '--model_class',
        '-mc',
        default='cifar10',
        help=
        "[default: %(default)s] The model you are performing experiment on.",
        metavar='<MC>')

    parser.add_argument(
        '--output_path',
        '-op',
        default='/tmp/output',
        help=
        "[default: %(default)s] The location of the estimator model after phase2.",
        metavar='<OP>')

    parser.add_argument(
        '--phase_one',
        '-pz',
        default='/tmp/models/cifar10/phase1',
        help=
        "[default: %(default)s] The directory where we stored the results from phase1",
        metavar='<PZ>')

    # Set defaults that are reasonable for this model.
    parser.set_defaults(data_dir='/tmp/cifar10_data',
                        resnet_size=32,
                        batch_size=128,
                        version=2,
                        output_path='/tmp/models/cifar10/phase2',
                        method='cp',
                        scope='cp',
                        rate=0.15,
                        rate_decay='flat')

    flags = parser.parse_args(args=argv[1:])
    '''Define the parameters we need for each experiment'''
    if flags.model_class == 'cifar10':
        model_class, input_fn, model_fn = Cifar10Model, cifar_input_fn, cifar10_model_fn
        model_conversion_fn = cifar10_model_conversion_fn
    else:
        model_class, input_fn, model_fn = ImagenetModel, imagenet_input_fn, imagenet_model_fn
        model_conversion_fn = imagenet_model_conversion_fn

    data_dir = flags.data_dir

    resnet_size, batch_size, version = flags.resnet_size, flags.batch_size, flags.version
    method, scope = flags.method, flags.scope
    compression_rate, epoch_num = flags.rate, flags.train_epochs

    phase1_store, output_path = flags.phase_one, flags.output_path

    checkpoint_dir = '%s/%s/rate%s' % (phase1_store, method, compression_rate)

    checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)

    session_config = tf.ConfigProto(device_count={'GPU': 1},
                                    inter_op_parallelism_threads=5,
                                    intra_op_parallelism_threads=10,
                                    allow_soft_placement=True)
    run_config = tf.estimator.RunConfig().replace(
        save_checkpoints_secs=1e9, session_config=session_config)

    model_output_dir = "%s/%s/rate%s/" % (output_path, method,
                                          compression_rate)

    if os.path.exists(model_output_dir):
        shutil.rmtree(model_output_dir)
    else:
        os.makedirs(model_output_dir)

    classifier = tf.estimator.Estimator(model_fn=model_conversion_fn,
                                        model_dir=model_output_dir,
                                        config=run_config,
                                        params={
                                            'resnet_size': resnet_size,
                                            'data_format': None,
                                            'batch_size': batch_size,
                                            'multi_gpu': flags.multi_gpu,
                                            'version': version,
                                            'checkpoint': checkpoint_file,
                                            'method': method,
                                            'scope': scope,
                                            'rate': compression_rate,
                                            'rate_decay': flags.rate_decay,
                                        })

    train_hooks = hooks_helper.get_train_hooks(flags.hooks,
                                               batch_size=batch_size)

    def input_fn_train():
        return input_fn(True, data_dir, batch_size, 1, 10, False)

    classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=1)

    print("phase2 model saved to %s" % model_output_dir)

    def input_fn_eval():
        return cifar_input_fn(False, data_dir, batch_size, 1, 10, False)

    eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=None)
    print(eval_results)
def main(argv):
    parser = ResnetArgParser()

    parser.add_argument(
        '--output_path',
        '-op',
        default='/tmp/cifar10_model_tensor_based',
        help=
        "[default: %(default)s] The location of the tensorized model of phase0.",
        metavar='<OP>')

    parser.add_argument(
        '--inter_store',
        '-is',
        default='/tmp/intermediate_storage/',
        help="[default: %(default)s] The tmp location of intermediate results",
        metavar='<IS>')

    # Set defaults that are reasonable for this model.
    parser.set_defaults(filename='normal_weights.ckpt',
                        method='normal',
                        scope='normal',
                        rate_decay='flat')

    flags = parser.parse_args(args=argv[1:])
    '''
    Save the weights ftom original resnet model to our model with modified scopes.
    The variable names are changes. Assume they have the same structures
    '''

    checkpoint = tf.train.latest_checkpoint(flags.model_dir) + ".meta"
    saver = tf.train.import_meta_graph(checkpoint)
    var_p_values = []

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        saver.restore(sess, tf.train.latest_checkpoint(flags.model_dir))

        var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        var = [v for v in var_list if 'Momentum' not in v.name]

        for i in range(1, len(var)):
            var_p_values.append(sess.run(var[i]))

    tf.reset_default_graph()

    model = ImagenetModel(flags.resnet_size,
                          flags.data_format,
                          version=flags.version)
    dataset = input_fn(is_training=False,
                       data_dir=flags.data_dir,
                       batch_size=flags.batch_size)
    iterator = dataset.make_initializable_iterator()
    next_element = iterator.get_next()
    outputs = model(next_element[0], False)

    checkpoint_file = flags.inter_store + "/" + flags.filename  #intermidate storage
    var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for i in range(len(var_list)):
            sess.run(var_list[i].assign(var_p_values[i]))

        new_saver = tf.train.Saver(var_list)
        new_saver.save(sess, checkpoint_file)
    '''
    Load the weights above (with modified names) into our resnet model 
    and save it via estimator
    '''
    session_config = tf.ConfigProto(inter_op_parallelism_threads=5,
                                    intra_op_parallelism_threads=10,
                                    allow_soft_placement=True)
    run_config = tf.estimator.RunConfig().replace(
        save_checkpoints_secs=1e9, session_config=session_config)
    output_model_path = flags.output_path
    if os.path.exists(output_model_path):
        shutil.rmtree(output_model_path)
    else:
        os.makedirs(output_model_path)

    classifier = tf.estimator.Estimator(model_fn=imagenet_model_conversion_fn,
                                        model_dir=output_model_path,
                                        config=run_config,
                                        params={
                                            'resnet_size': flags.resnet_size,
                                            'data_format': flags.data_format,
                                            'batch_size': flags.batch_size,
                                            'multi_gpu': True,
                                            'version': flags.version,
                                            'checkpoint': checkpoint_file,
                                            'method': flags.method,
                                            'scope': flags.scope,
                                            'rate': flags.rate,
                                            'rate_decay': flags.rate_decay,
                                        })

    train_hooks = hooks_helper.get_train_hooks(flags.hooks,
                                               batch_size=flags.batch_size)

    def input_fn_train():
        return input_fn(True, flags.data_dir, flags.batch_size, 1, 10, False)

    classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=1)
Пример #5
0
 def test_raise_in_invalid_names(self):
     invalid_names = ['StepCounterHook', 'StopAtStepHook']
     with self.assertRaises(ValueError):
         hooks_helper.get_train_hooks(invalid_names, batch_size=256)
Пример #6
0
 def test_raise_in_non_list_names(self):
     with self.assertRaises(ValueError):
         hooks_helper.get_train_hooks(
             'LoggingTensorHook, ProfilerHook', batch_size=256)
def resnet_main(flags, model_function, input_function):
  # Using the Winograd non-fused algorithms provides a small performance boost.
  os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
  
  if not os.path.exists(flags.model_dir):
    os.makedirs(flags.model_dir)
  
  logging.basicConfig(level=logging.INFO,
                      datefmt='%m-%d %H:%M',
                      filename='%s/%s_%s.log' %(flags.model_dir, flags.method, flags.rate),
                      filemode='a+')

  logging.info("Starting end to end training...")

  if flags.multi_gpu:
    validate_batch_size_for_multi_gpu(flags.batch_size)

    # There are two steps required if using multi-GPU: (1) wrap the model_fn,
    # and (2) wrap the optimizer. The first happens here, and (2) happens
    # in the model_fn itself when the optimizer is defined.
    model_function = tf.contrib.estimator.replicate_model_fn(
        model_function,
        loss_reduction=tf.losses.Reduction.MEAN)

  # Create session config based on values of inter_op_parallelism_threads and
  # intra_op_parallelism_threads. Note that we default to having
  # allow_soft_placement = True, which is required for multi-GPU and not
  # harmful for other modes.
  
  session_config = tf.ConfigProto(
      inter_op_parallelism_threads=flags.inter_op_parallelism_threads,
      intra_op_parallelism_threads=flags.intra_op_parallelism_threads,
      allow_soft_placement=True)

  # Set up a RunConfig to save checkpoint and set session config.
  run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9,
                                                session_config=session_config)
  classifier = tf.estimator.Estimator(
      model_fn=model_function, model_dir=flags.model_dir, config=run_config,
      params={
          'resnet_size': flags.resnet_size,
          'data_format': flags.data_format,
          'batch_size': flags.batch_size,
          'multi_gpu': flags.multi_gpu,
          'version': flags.version,
          'method': flags.method,
          'scope': flags.scope,
          'rate': flags.rate,
          'rate_decay': flags.rate_decay
      })

  for epoch in range(flags.train_epochs // flags.epochs_between_evals):
    train_hooks = hooks_helper.get_train_hooks(flags.hooks,
                                               batch_size=flags.batch_size)

    current_epoch = (epoch+1)*flags.epochs_between_evals
    print('Starting a training cycle up to epoch %d' %current_epoch)
    logging.info('Starting a training cycle up to epoch %d' %current_epoch)

    def input_fn_train():
      return input_function(True, flags.data_dir, flags.batch_size,
                            flags.epochs_between_evals,
                            flags.num_parallel_calls, flags.multi_gpu)

    classifier.train(input_fn=input_fn_train, hooks=train_hooks,
                     max_steps=flags.max_train_steps)

    print('Starting to evaluate.')
    logging.info('Starting to evaluate.')

    # Evaluate the model and print results
    def input_fn_eval():
      return input_function(False, flags.data_dir, flags.batch_size,
                            1, flags.num_parallel_calls, flags.multi_gpu)

    # flags.max_train_steps is generally associated with testing and profiling.
    # As a result it is frequently called with synthetic data, which will
    # iterate forever. Passing steps=flags.max_train_steps allows the eval
    # (which is generally unimportant in those circumstances) to terminate.
    # Note that eval will run for max_train_steps each loop, regardless of the
    # global_step count.
    eval_results = classifier.evaluate(input_fn=input_fn_eval,
                                       steps=flags.max_train_steps)

    print("Testing accuracy on epoch %d: %s" %((epoch+1)*flags.epochs_between_evals, eval_results))
    logging.info("Testing accuracy on epoch %d: %s" %((epoch+1)*flags.epochs_between_evals, eval_results))