def tower_fn(build_arch,
             x,
             y,
             scope,
             num_classes,
             is_train=True,
             reuse_variables=None):
  """Model tower to be run on each GPU.
  
  Args: 
    build_arch:
    x: split of batch_x allocated to particular GPU
    y: split of batch_y allocated to particular GPU
    scope:
    num_classes:
    is_train:
    reuse_variables: False for the first GPU, and True for subsequent GPUs

  Returns:
    loss: mean loss across samples for one tower (scalar)
    scores: 
      If the architecture is a capsule network, then the scores are the output 
      activations of the class caps.
      If the architecture is the CNN baseline, then the scores are the logits of 
      the final layer.
      (samples_per_tower, n_classes)
      (64/4=16, 5)
  """
  
  with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
    x, patch = patch_inputs(x, is_train=is_train, reuse=reuse_variables)
    output = build_arch(x, False, num_classes=num_classes)
  targets = tf.fill(dims=y.get_shape().as_list(), value=FLAGS.target_class, name="adversarial_targets")
  if FLAGS.carliniwagner:
    # carlini wagner adversarial objective
    loss = mod.carlini_wagner_loss(output, targets, num_classes)
  else:
    # default hinge loss from Hinton et al
    loss = mod.total_loss(output, targets)
  return loss, output['scores'], x, patch, targets
示例#2
0
def tower_fn(build_arch,
             x,
             y,
             scope,
             num_classes,
             is_train=True,
             reuse_variables=None):
    """Model tower to be run on each GPU.
  
  Author:
    Ashley Gritzman 27/11/2018
    
  Args: 
    build_arch:
    x: split of batch_x allocated to particular GPU
    y: split of batch_y allocated to particular GPU
    scope:
    num_classes:
    is_train:
    reuse_variables: False for the first GPU, and True for subsequent GPUs

  Returns:
    loss: mean loss across samples for one tower (scalar)
    scores: 
      If the architecture is a capsule network, then the scores are the output 
      activations of the class caps.
      If the architecture is the CNN baseline, then the scores are the logits of 
      the final layer.
      (samples_per_tower, n_classes)
      (64/4=16, 5)
  """

    with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
        output = build_arch(x, is_train, num_classes=num_classes)
        scores = output['scores']

    loss = mod.total_loss(scores, y)

    return loss, scores