def __init__(self, learning_rate=0.1, epsilon=None, use_locking=False):
     super(RayGrad, self).__init__(learning_rate,
                                   epsilon=epsilon,
                                   use_locking=use_locking)
     self.learning_rate = learning_rate
     self.epsilon = epsilon
     self.memory_size = S("optimizer.memory_size")
     self.loss_collect_last = S("optimizer.collect_last")
Exemplo n.º 2
0
def _RedoRestFilters(graph):
    """Finds fused batch norm layers and folds them into preceding layers.

  Folding only affects the following layers: Conv2D, fully connected, depthwise
  convolution.

  Args:
    graph: Graph to walk and modify.
    is_training: Bool, true if training.

  Raises:
    ValueError: When batch norm folding fails.
  """
    matches = _FindRestFilters(graph)
    print(
        "Replacing", len(matches),
        "Conv|Mul|DepthwiseConv2dNative-Filters (without a suceeding BatchNorm)"
    )
    for match in matches:
        scope, sep, _ = match['layer_op'].name.rpartition('/')
        # Make sure new ops are added to `graph` and put on the same device as
        # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope
        # named `scope`. Otherwise, TF creates a unique scope whose name starts with
        # `scope`.
        with graph.as_default(), graph.name_scope(scope + sep):
            with graph.name_scope(scope + sep + '_psb' + sep):

                weight = match['weight_tensor']

                # >>>>> CUSTOM >>>>>>>>>>>>>>
                # use hidden variable instead
                sampled_weight = variableFromSettings([], hiddenVar=weight)[0]
                # <<<<<<<<<<<<<<<<<<<<<<<<<<<

                new_layer_tensor = _CloneWithNewOperands(
                    match['layer_op'], match['input_tensor'], sampled_weight,
                    False)
                if S("util.variable.fixed_point.use"):
                    new_layer_tensor = fixed_point(
                        new_layer_tensor,
                        S("util.variable.fixed_point.bits"),
                        max=S("util.variable.fixed_point.max"),
                        min=S("util.variable.fixed_point.min"))

                nodes_modified_count = common.RerouteTensor(
                    new_layer_tensor, match['output_tensor'])
                if nodes_modified_count == 0:
                    raise ValueError(
                        'Folding batch norms failed, %s had no outputs.' %
                        match['output_tensor'].name)
def preprocess_image(image_buffer,
                     bbox,
                     output_height,
                     output_width,
                     num_channels,
                     is_training=False):
    """Preprocesses the given image.

  Preprocessing includes decoding, cropping, and resizing for both training
  and eval images. Training preprocessing, however, introduces some random
  distortion of the image to improve accuracy.

  Args:
    image_buffer: scalar string Tensor representing the raw JPEG image buffer.
    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
      where each coordinate is [0, 1) and the coordinates are arranged as
      [ymin, xmin, ymax, xmax].
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    num_channels: Integer depth of the image buffer for decoding.
    is_training: `True` if we're preprocessing the image for training and
      `False` otherwise.

  Returns:
    A preprocessed image.
  """
    if is_training:
        # For training, we want to randomize some of the distortions.
        image = _decode_crop_and_flip(image_buffer, bbox, num_channels)
        image = _resize_image(image, output_height, output_width)
    else:
        # For validation, we want to decode, resize, then just crop the middle.
        image = tf.image.decode_jpeg(image_buffer, channels=num_channels)
        image = _aspect_preserving_resize(image, _RESIZE_MIN)
        image = _central_crop(image, output_height, output_width)

    image.set_shape([output_height, output_width, num_channels])

    if S("dataset_mean_image_subtraction") == "pytorch":
        print("pytorch -mode: scale to -1,1")
        image /= 255.0
        image -= 0.5
        image *= 2.0
    elif S("dataset_mean_image_subtraction"):
        print("tensorflow-mode: mean image subtraction")
        image = _mean_image_subtraction(image, _CHANNEL_MEANS, num_channels)

    return image
def next_base2(x,
               strict_positive=False,
               stochastic=False,
               min=1e-8,
               binom_n=64):
    with tf.name_scope('next_base2'):
        x_start = x
        if strict_positive:
            sign = 1
        else:
            sign = tf.sign(x)
        if stochastic:
            # x_next_base2 = tf.floor(tf.log(tf.abs(x+eps))/tf.log(2.0))
            x_next_base2 = tf.floor(
                tf.log(tf.maximum(tf.abs(x), min)) / tf.log(2.0))
            x_perc_missing = tf.abs(x) / 2**x_next_base2 - 1
            # w_add = where_binarize[0,1]->{0,1}(x+exs)
            print("next_base2: stochastic-mode '" + str(stochastic) + "'")
            if stochastic == "binomial" or stochastic == "binom":
                memory_size = binom_n
                w_add = sample_binomial(x_perc_missing, memory_size,
                                        S('binom.log_eps')) / memory_size
                tf.summary.histogram("w_add", w_add)
            else:
                w_add = tf.where(
                    tf.random.uniform(x.get_shape().as_list()) <=
                    x_perc_missing, tf.ones_like(x), tf.zeros_like(x))
            x_next_base2 += w_add
        else:
            x_next_base2 = tf.ceil(
                tf.log(tf.maximum(tf.abs(x), min)) / tf.log(2.0))
        return pass_gradient(x_start,
                             lambda x: sign * 2**x_next_base2,
                             name='next_base2')
Exemplo n.º 5
0
def network_inner(data, labels_one_hot, mode):
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    id = lambda net, name=None: net
    GLOBAL["weight_counter"] = 0

    print("is_training:" + str(is_training))
    batch_normalization = lambda net, name=None: tfl.batch_normalization(
        net, name=name, reuse=is_training, training=is_training)

    numclasses = GLOBAL["dataset"].num_classes()
    data = to_picture_shape(data)
    net = data
    use_bias = False

    # stack convs
    for i, channels in enumerate(S("model.resnet.conv_blocks")):
        with tf.variable_scope("conv" + str(i)):
            net = tfl.conv2d(net,
                             64,
                             3,
                             strides=2,
                             padding="SAME",
                             use_bias=use_bias)
            net = batch_normalization(net)
            net = activation(net)

    # end
    net = tf.reduce_mean(net, [1, 2], name='pool5', keepdims=True)
    tf.summary.histogram("pre_fc", net)
    if S("model.resnet.last_layer_real"):
        net = tfl.dense(net, numclasses)
    else:
        net = tfl.conv2d(net,
                         numclasses,
                         1,
                         strides=1,
                         padding="SAME",
                         use_bias=use_bias)
    net = tf.reshape(net, [-1, numclasses])
    return net
Exemplo n.º 6
0
def network(data, labels_one_hot, mode):
    model_name = S("model.classification_models.model")
    dataset = S("model.classification_models.dataset")

    # keras.backend.set_learning_phase(1 if mode==tf.estimator.ModeKeys.TRAIN else 0) # 0: Test(default), 1: Train
    keras.backend.set_learning_phase(0)  # 0: Test(default), 1: Train
    classifier, preprocess_input = Classifiers.get(model_name)

    # overwrite preprocess_input for mobilenet (workaround for a bug in keras_applications)
    if "mobilenet" in model_name:
        from keras.applications import imagenet_utils
        preprocess_input = lambda data: imagenet_utils.preprocess_input(
            data, mode='tf')

    # apply model
    data = preprocess_input(data)
    GLOBAL["keras_model_preprocess"] = preprocess_input
    model = classifier((224, 224, 3), input_tensor=data, weights=dataset)
    GLOBAL["keras_model"] = model
    logits = model.output

    # keras-models do not use empty-class
    logits = tf.concat([tf.expand_dims(logits[:, 0] * 0, 1), logits], axis=-1)
    return logits
Exemplo n.º 7
0
    def get_filenames(self):
        if S("dataset_join_train_val"):
            if self.subset == 'train':
                print([
                    os.path.join(self.data_dir, 'train.tfrecords'),
                    os.path.join(self.data_dir, 'validation.tfrecords')
                ])
                print(
                    "joining training and validation set. (leaving only testset for testing"
                )
                return [
                    os.path.join(self.data_dir, 'train.tfrecords'),
                    os.path.join(self.data_dir, 'validation.tfrecords')
                ]

        if self.subset in ['train', 'validation', 'eval']:
            return [os.path.join(self.data_dir, self.subset + '.tfrecords')]
        else:
            raise ValueError('Invalid data subset "%s"' % self.subset)
Exemplo n.º 8
0
def lossfn(net_out, data, labels_one_hot, mode):
    with tf.name_scope('cross_entropy'):
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels_one_hot,
                                                      logits=net_out)
        tf.summary.scalar("loss", loss)

    with tf.name_scope('regularization'):
        reg_variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        if len(reg_variables) > 0:
            reg = tf.reduce_mean(reg_variables, name="regularization_loss")
            tf.summary.scalar('regularization', reg)
        else:
            reg = 0

    with tf.name_scope('total_loss'):
        if reg != 0:
            total = loss + S("util.variable.regularizer.weight", alt=1.0) * reg
            tf.summary.scalar("total_loss", total)
            return total

    return loss
Exemplo n.º 9
0
def variableFromSettings(shape, S=S, hiddenVar=None):
    # local variables for mini-parser
    V = {}

    # initializer
    base_initializer = getattr(tf.initializers, S("initializer"))
    initializer = TransformInitializer(base_initializer,
                                       S("transformation.init", alt=[]),
                                       dtype=getattr(tf, S("dtype")),
                                       seed=S("seed"))

    with tf.name_scope("var"):

        var_name = S("name")

        # define variable
        if hiddenVar is None:
            p = tf.get_variable(name=var_name,
                                shape=shape,
                                initializer=initializer,
                                regularizer=None,
                                trainable=True)
        else:
            p = hiddenVar

        # apply pruning
        if S("pruning.activate", scope=""):
            p = tf.contrib.model_pruning.apply_mask(
                p, scope=tf.contrib.framework.get_name_scope())

        V["p"] = p

        # check for shared tensors
        localvars_used = []
        for T in [
                S("transformation.hidden", alt=[]),
                S("transformation.weight", alt=[]),
                S("transformation.regularizer.weight_transformation", alt=[])
        ]:

            for i, t in enumerate(T):
                localvars = []
                t_orig = t
                if isinstance(t, tuple):
                    if t[0] not in ["w", "p", "x"]:
                        localvars_used.append(t[0])
                    t = t[1]

                def parse_func(t, fn, string_fn):
                    if not isinstance(string_fn, str):
                        return str(t)
                    if t.startswith(fn + "(") and t.endswith(")"):
                        names = t[len(fn) + 1:-1].split(",")
                        t = string_fn(*names)
                    return t

                # parse predefined functions
                t = parse_func(
                    t, "relaxed_binarize_wolog(0±ε)->[0,1]", lambda var:
                    "tf.sigmoid((%s+eps + tf.log(rng) - tf.log(1-rng))/%s)" %
                    (var, "relaxation_temp"))
                t = parse_func(
                    t, "relaxed_binarize_wlog(1±ε)->[0,1]", lambda var:
                    "tf.sigmoid((tf.log(tf.abs(%s+eps)) + tf.log(rng) - tf.log(1-rng))/%s)"
                    % (var, "relaxation_temp"))
                t = parse_func(
                    t, "gumbel_binarize_wolog(1±ε)->[0,1]", lambda var:
                    "tf.abs(%s) + tf.log(rng) - tf.log(1-rng)" % var)
                t = parse_func(
                    t, "where_binarize[0,1]->{0,1}",
                    lambda var: "tf.where(rng <= " + var +
                    ", ones, zeros, name='sampled_filter')")
                t = parse_func(
                    t, "pass_through_binarize[0,1]->{-1,1}",
                    lambda var: "pass_gradient(" + var +
                    ", lambda p, localvars: tf.where(rng <= p, ones, -ones, name='sampled_filter'))"
                )
                t = parse_func(
                    t, "pass_through_binarize[0,1]->{0,1}",
                    lambda var: "pass_gradient(" + var +
                    ", lambda p, localvars: tf.where(rng <= p, ones, zeros, name='sampled_filter'))"
                )
                t = parse_func(
                    t, "softround", lambda var: var + " - tf.sin(2*np.pi*" +
                    var + ")/(2*np.pi)")
                t = parse_func(
                    t, "passed_round", lambda var: "2**pass_gradient(" + var +
                    ", lambda x: x - tf.sin(2*np.pi*x)/(2*np.pi))")
                t = parse_func(
                    t, "lecun_normalize", lambda var: "tf.identity((" + var +
                    "-tf.nn.moments(" + var + ",axes=None)[0])/tf.nn.moments("
                    + var + ",axes=None)[1]*np.sqrt(1/np.prod(" + var +
                    ".get_shape().as_list()[:-1])),name=\"lecun\")")
                t = parse_func(
                    t, "lecun_normalize_no_mean",
                    lambda var: "tf.identity((" + var + ")/tf.nn.moments(" +
                    var + ",axes=None)[1]*np.sqrt(1/np.prod(" + var +
                    ".get_shape().as_list()[:-1])),name=\"lecun\")")

                # get variables
                V["eps"] = 1e-5
                if "ones" in t and "ones" not in V:
                    localvars.append("ones")
                    V["ones"] = tf.ones(shape)
                if "zeros" in t and "zeros" not in V:
                    localvars.append("zeros")
                    V["zeros"] = tf.zeros(shape)
                if "rng" in t and "rng" not in V:
                    localvars.append("rng")
                    V["rng"] = tf.random_uniform(shape,
                                                 name="rng")  # independent

                for var in localvars_used:
                    if var in t and var not in localvars:
                        localvars.append(var)

                # replace localvars
                if "localvars" in t:
                    t = t.replace("localvars",
                                  ",".join([v + "=" + v for v in localvars]))

                # save modified t again
                if isinstance(t_orig, tuple):
                    T[i] = (t_orig[0], t)
                else:
                    T[i] = t

        # hidden variable transformations
        for t in S("transformation.hidden", alt=[]):
            if isinstance(t, tuple):
                name = t[0]
                V[name] = eval(t[1], {**G, **V})
                if name.lower() == "assert":
                    try:
                        assert V[name]
                    except AssertionError:
                        raise AssertionError(t[1])
            else:
                V["p"] = eval(t, {**G, **V})

        # map hidden weight to weight
        V["w"] = p
        for t in S("transformation.weight", alt=[]):
            if isinstance(t, tuple):
                name = t[0]
                V[name] = eval(t[1], {**G, **V})
                if name.lower() == "assert":
                    try:
                        assert V[name]
                    except AssertionError:
                        raise AssertionError(t[1])
            else:
                V["w"] = eval(t, {**G, **V})

    # add regularizer
    if S("regularizer.type") is not None:
        if all(var_name not in s
               for s in S("regularizer.exclude_names", alt=[])):
            tf.contrib.layers.apply_regularization(
                getattr(tf.contrib.layers,
                        S("regularizer.type"))(S("regularizer.weight")),
                [eval(S("regularizer.weight_transformation"), {
                    **G,
                    **V
                })])
        else:
            print("excluding:", var_name)

    GLOBAL["weight_counter"] += 1

    # return sampled weight / hidden variable - combo
    return V["w"], V["p"]
Exemplo n.º 10
0
from util.helpers import pass_gradient, sample_binomial, next_base2, fixed_point
from util.initializer import TransformInitializer

# global variables for mini-parser
G = {
    "tf": tf,
    "np": np,
    "S": S,
    "pass_gradient": pass_gradient,
    "sample_binomial": sample_binomial,
    "next_base2": next_base2,
    "fixed_point": fixed_point,
    "GLOBAL": GLOBAL,
}

S = S(scope="util.variable")


def variableFromSettings(shape, S=S, hiddenVar=None):
    # local variables for mini-parser
    V = {}

    # initializer
    base_initializer = getattr(tf.initializers, S("initializer"))
    initializer = TransformInitializer(base_initializer,
                                       S("transformation.init", alt=[]),
                                       dtype=getattr(tf, S("dtype")),
                                       seed=S("seed"))

    with tf.name_scope("var"):
Exemplo n.º 11
0
def _FoldFusedBatchNorms(graph, is_training, freeze_batch_norm_delay):
    """Finds fused batch norm layers and folds them into preceding layers.

  Folding only affects the following layers: Conv2D, fully connected, depthwise
  convolution.

  Args:
    graph: Graph to walk and modify.
    is_training: Bool, true if training.
    freeze_batch_norm_delay: How many steps to wait before freezing moving mean
      and variance and using them for batch normalization.

  Raises:
    ValueError: When batch norm folding fails.
  """

    matches = list(_FindFusedBatchNorms(graph))
    print("Folding", len(matches), "FusedBatchNorms")
    for match in matches:
        scope, sep, _ = match.layer_op.name.rpartition('/')
        # Make sure new ops are added to `graph` and put on the same device as
        # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope
        # named `scope`. Otherwise, TF creates a unique scope whose name starts with
        # `scope`.
        with graph.as_default(), graph.name_scope(scope + sep):
            with graph.name_scope(scope + sep + 'BatchNorm_Fold' + sep):
                # new weights = old weights * gamma / sqrt(variance + epsilon)
                # new biases = -mean * gamma / sqrt(variance + epsilon) + beta
                multiplier_tensor = match.gamma_tensor * math_ops.rsqrt(
                    match.variance_tensor + match.bn_op.get_attr('epsilon'))
                bias_tensor = math_ops.subtract(match.beta_tensor,
                                                match.mean_tensor *
                                                multiplier_tensor,
                                                name='bias')

                correction_scale, correction_recip, correction_offset = None, None, None
                if is_training:
                    correction_scale, correction_recip, correction_offset = (
                        _ComputeBatchNormCorrections(
                            context='',
                            match=match,
                            freeze_batch_norm_delay=freeze_batch_norm_delay))
                # The shape of depthwise weights is different, so we need to reshape the
                # multiplier_tensor to ensure that the scaled_weight_tensor has the
                # expected shape.
                weights = match.weight_tensor

                # remember for the other loops
                matched_layer_set.add(match.layer_op)
                matched_layer_set.add(match.bn_op)

                if match.layer_op.type == 'DepthwiseConv2dNative':
                    new_shape = [
                        match.weight_tensor.get_shape().as_list()[2],
                        match.weight_tensor.get_shape().as_list()[3]
                    ]
                    multiplier_tensor = array_ops.reshape(multiplier_tensor,
                                                          new_shape,
                                                          name='scale_reshape')

                    if correction_scale is not None:
                        correction_scale = array_ops.reshape(
                            correction_scale,
                            new_shape,
                            name='correction_reshape')

                if correction_scale is not None:
                    weights = math_ops.multiply(correction_scale,
                                                weights,
                                                name='correction_mult')

                scaled_weight_tensor = math_ops.multiply(weights,
                                                         multiplier_tensor,
                                                         name='mul_fold')

                # >>>>> CUSTOM >>>>>>>>>>>>>>
                # use hidden variable instead
                scaled_weight_tensor = variableFromSettings(
                    [], hiddenVar=scaled_weight_tensor)[0]
                # bias_tensor = variableFromSettings([],hiddenVar=bias_tensor)[0]
                # bias_tensor = next_base2(bias_tensor, strict_positive=False, min=1e-8)
                if S("util.variable.fixed_point.use"):
                    bias_tensor = fixed_point(
                        bias_tensor,
                        S("util.variable.fixed_point.bits"),
                        max=S("util.variable.fixed_point.max"),
                        min=S("util.variable.fixed_point.min"))
                # <<<<<<<<<<<<<<<<<<<<<<<<<<<

                new_layer_tensor = _CloneWithNewOperands(
                    match.layer_op, match.input_tensor, scaled_weight_tensor,
                    match.batch_to_space_op)

                if correction_recip is not None:
                    new_layer_tensor = math_ops.multiply(correction_recip,
                                                         new_layer_tensor,
                                                         name='post_conv_mul')
                    new_layer_tensor = math_ops.add(new_layer_tensor,
                                                    (correction_offset),
                                                    'correction_add')
                if S("util.variable.fixed_point.use"):
                    new_layer_tensor = fixed_point(
                        new_layer_tensor,
                        S("util.variable.fixed_point.bits"),
                        max=S("util.variable.fixed_point.max"),
                        min=S("util.variable.fixed_point.min"))

                new_layer_tensor = math_ops.add(new_layer_tensor,
                                                bias_tensor,
                                                name='add_fold')
                if S("util.variable.fixed_point.use"):
                    new_layer_tensor = tf.clip_by_value(
                        new_layer_tensor, S("util.variable.fixed_point.min"),
                        S("util.variable.fixed_point.max"))

                nodes_modified_count = common.RerouteTensor(
                    new_layer_tensor, match.output_tensor)
                if nodes_modified_count == 0:
                    raise ValueError(
                        'Folding batch norms failed, %s had no outputs.' %
                        match.output_tensor.name)
Exemplo n.º 12
0
def _RedoRestBatchnorms(graph, is_training):
    """Finds fused batch norm layers and folds them into preceding layers.

  Folding only affects the following layers: Conv2D, fully connected, depthwise
  convolution.

  Args:
    graph: Graph to walk and modify.
    is_training: Bool, true if training.

  Raises:
    ValueError: When batch norm folding fails.
  """
    matches = _FindRestBatchNorms(graph)
    print("Replacing", len(matches), "BatchNorms (without a preceding Conv2D)")
    for match in matches:
        scope, sep, _ = match.bn_op.name.rpartition('/')
        # Make sure new ops are added to `graph` and put on the same device as
        # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope
        # named `scope`. Otherwise, TF creates a unique scope whose name starts with
        # `scope`.
        with graph.as_default(), graph.name_scope(scope + sep):
            with graph.name_scope(scope + sep + '_psb' + sep):

                mean = match.mean_tensor
                variance = match.variance_tensor
                beta = match.beta_tensor
                gamma = match.gamma_tensor
                eps = match.batch_epsilon

                # new gamma = gamma / sqrt(variance + epsilon)
                # new biases = -mean * gamma / sqrt(variance + epsilon) + beta
                multfac = gamma / math_ops.sqrt(variance + eps)
                gamma = multfac
                beta = -multfac * mean + beta
                mean = array_ops.zeros_like(mean)
                variance = array_ops.ones_like(variance)
                eps = array_ops.zeros_like(eps)

                gamma = variableFromSettings([], hiddenVar=gamma)[0]
                # gamma = fixed_point(gamma,S("util.variable.fixed_point.bits"),max=S("util.variable.fixed_point.max"),min=S("util.variable.fixed_point.min"))
                # gamma = next_base2(gamma,strict_positive=False)
                # gamma = 1/variableFromSettings([],hiddenVar=1/gamma)[0]
                # variance = variableFromSettings([],hiddenVar=math_ops.sqrt(variance+eps))[0]**2
                # beta = variableFromSettings([],hiddenVar=beta)[0]
                if S("util.variable.fixed_point.use"):
                    beta = fixed_point(beta,
                                       S("util.variable.fixed_point.bits"),
                                       max=S("util.variable.fixed_point.max"),
                                       min=S("util.variable.fixed_point.min"))
                    # gamma = fixed_point(gamma,S("util.variable.fixed_point.bits"),max=S("util.variable.fixed_point.max"),min=S("util.variable.fixed_point.min"))
                    # mean = fixed_point(mean,S("util.variable.fixed_point.bits"),max=S("util.variable.fixed_point.max"),min=S("util.variable.fixed_point.min"))
                    # variance = fixed_point(variance,S("util.variable.fixed_point.bits"),max=S("util.variable.fixed_point.max"),min=S("util.variable.fixed_point.min"))

                # fixed_point division could be ok
                # silly silly_idiv(silly x, silly y) {
                #     uint64_t sign_bit = 1UL<<63;
                #     // unsetting the sign bit to ignore it
                #     silly res = ((x & ~sign_bit) / (y & sign_bit)) << 32;

                #     // setting the sign bit iff only one of sign bits is set
                #     res |= (x & sign_bit) ^ (y & sign_bit);
                #     return res;
                # }

            new_layer_tensor = nn.batch_normalization(
                match.input_tensor,
                mean,
                variance,
                beta,
                gamma,
                eps,
                name=match.bn_op.name.split("/")[-1] + "_psb")
            if S("util.variable.fixed_point.use"):
                new_layer_tensor = fixed_point(
                    new_layer_tensor,
                    S("util.variable.fixed_point.bits"),
                    max=S("util.variable.fixed_point.max"),
                    min=S("util.variable.fixed_point.min"))
            nodes_modified_count = common.RerouteTensor(
                new_layer_tensor, match.output_tensor)
            if nodes_modified_count == 0:
                raise ValueError(
                    'Folding batch norms failed, %s had no outputs.' %
                    match['output_tensor'].name)
def sample_binomial(p, n, eps=S('binom.log_eps')):
    # sample from a binomial distribution
    if S("binom.probability_mode") == "tfp":
        P = tf.stack([p, 1.0 - p], axis=-1)
        weight_binom = tfp.distributions.Multinomial(total_count=n,
                                                     probs=P).sample()[..., 0]
        # weight_binom = tfp.distributions.Binomial(total_count=n,probs=p).sample()
        weight_binom = tf.cast(weight_binom, tf.float32)
    elif S("binom.probability_mode") == "gumbel":
        with tf.variable_scope("p"):
            # p = weight_p
            p = tf.clip_by_value(p, 0.0, 1.0)
            P = tf.stack([
                binomialCoeff(n, k) * p**k * (1 - p)**(n - k)
                for k in range(n + 1)
            ],
                         axis=-1)

            # reduces numerical instabilities
            P = tf.clip_by_value(P, eps, 1.0)
            gumbel = -tf.log(
                tf.maximum(
                    -tf.log(tf.maximum(tf.random.uniform(P.get_shape()), eps)),
                    eps))

            # gumbel = -tf.log(-tf.log(tf.random.uniform(P.get_shape())))
            # tf.summary.histogram("binom_p",p)
            # tf.summary.histogram("binom_P",P)
            # tf.summary.histogram("binom_logP",tf.log(P))
        weight_binom = tf.argmax(tf.log(P) + gumbel, axis=-1)
        weight_binom = tf.cast(weight_binom, tf.float32)
    elif S("binom.probability_mode") == "gumbel_log":
        with tf.variable_scope("p"):
            # p = weight_p
            p = tf.clip_by_value(p, eps, 1.0 - eps)
            logP = tf.stack([
                np.log(binomialCoeff(n, k)) + k * tf.log(p) +
                (n - k) * tf.log(1 - p) for k in range(n + 1)
            ],
                            axis=-1)

            # reduces numerical instabilities
            gumbel = -tf.log(
                tf.maximum(
                    -tf.log(
                        tf.maximum(tf.random.uniform(logP.get_shape()), eps)),
                    eps))

        weight_binom = tf.argmax(logP + gumbel, axis=-1)
        weight_binom = tf.cast(weight_binom, tf.float32)

    if S("binom.gradient_correction") == "pass":
        weight_binom = pass_gradient(p, lambda p: weight_binom,
                                     lambda p: n * p)
    elif S("binom.gradient_correction") == "gumbel":
        weight_binom = pass_gradient(
            p, lambda p: weight_binom, lambda p: tf.squeeze(
                tf.batch_gather(
                    P, tf.cast(tf.expand_dims(weight_binom, -1), tf.int32))))
    else:
        raise ValueError("Gradient not defined for tf.cast. TODO")
    return weight_binom
import tensorflow as tf
import numpy as np
from template.misc import S, GLOBAL, print_info as print
if S("binom.probability_mode") == "tfp":
    import tensorflow_probability as tfp

# -------------- #
# tensor helpers #
# -------------- #


# get shape (excluding input shape)
def getshape(x):
    return x.get_shape().as_list()[1:]


# pass gradient around non-diferentiable function
def pass_gradient(x, backward_fn, forward_fn=lambda x: x, name=None):
    fnx = forward_fn(x)
    return tf.add(fnx, tf.stop_gradient(backward_fn(x) - fnx), name=name)


# guess picture shape and reshape
def to_picture_shape(input):
    current_shape = input.get_shape().as_list()[1:]
    current_dim = np.prod(current_shape)

    shape = None
    for i in range(256, 3, -1):
        if current_dim % (i * i) == 0:
            shape = [-1, i, i, int(current_dim / (i * i))]
    def minimize(self,
                 loss,
                 global_step=None,
                 var_list=None,
                 aggregation_method=None,
                 colocate_gradients_with_ops=False,
                 name=None,
                 grad_loss=None):

        # compute (meaned) gradients for a batch
        grads_and_vars = self.compute_gradients(
            loss,
            var_list=var_list,
            aggregation_method=aggregation_method,
            colocate_gradients_with_ops=colocate_gradients_with_ops,
            grad_loss=grad_loss)

        # check if any trainable variables provided
        for g, v in grads_and_vars:
            if g is None:
                print("Gradient of '" + v.name + "' is 'None'. Ignoring")
        grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None]

        # default adam does:
        # return self.apply_gradients(grads_and_vars, global_step=global_step, name=name)

        # get all trainable variables
        variables = [v for g, v in grads_and_vars]

        # create a copy of all trainable variables with `0` as initial values
        with tf.name_scope("optimizer"):
            gradient_sum = [
                tf.get_variable(v.name.replace(":0", "_sum"),
                                initializer=tf.zeros_like(
                                    v.initialized_value()),
                                trainable=False) for v in variables
            ]

        def capacity_gradient(grad_sum, grad, name, var):
            if "hiddenWeight" in name and "weight_gradient" in GLOBAL:
                return GLOBAL["weight_gradient"](grad_sum, grad, var)
            return grad_sum + grad

        with tf.control_dependencies([GLOBAL["memory_step"]]):

            # collect the batch gradient into accumulated vars
            gradient_sum_update = [
                gs.assign(
                    tf.where(GLOBAL["memory_step"] > 0,
                             capacity_gradient(gs, g, v.name, v), g))
                for gs, (g, v) in zip(gradient_sum, grads_and_vars)
            ]

            with tf.control_dependencies(gradient_sum_update):
                train_step = tf.cond(
                    GLOBAL["memory_step"] >= S("optimizer.memory_size") - 1,
                    true_fn=lambda: self.apply_gradients([
                        (gs / S("optimizer.memory_size"), v)
                        for gs, (g, v) in zip(gradient_sum, grads_and_vars)
                    ], global_step),
                    false_fn=lambda: tf.no_op())

        return train_step
Exemplo n.º 16
0
def batch_normalization(
        inputs,
        axis=-1,
        momentum=0.99,
        epsilon=1e-3,
        center=True,
        scale=True,
        beta_initializer=init_ops.zeros_initializer(),
        gamma_initializer=init_ops.ones_initializer(),
        moving_mean_initializer=init_ops.zeros_initializer(),
        moving_variance_initializer=init_ops.ones_initializer(),
        beta_regularizer=None,
        gamma_regularizer=None,
        beta_constraint=None,
        gamma_constraint=None,
        training=False,
        trainable=True,
        name=None,
        reuse=None,
        renorm=False,
        renorm_clipping=None,
        renorm_momentum=0.99,
        fused=None,
        virtual_batch_size=None,
        adjustment=None):
    layer = BatchNormalization(
        axis=axis,
        momentum=momentum,
        epsilon=epsilon,
        center=center,
        scale=scale,
        beta_initializer=beta_initializer,
        gamma_initializer=gamma_initializer,
        moving_mean_initializer=moving_mean_initializer,
        moving_variance_initializer=moving_variance_initializer,
        beta_regularizer=beta_regularizer,
        gamma_regularizer=gamma_regularizer,
        beta_constraint=beta_constraint,
        gamma_constraint=gamma_constraint,
        renorm=renorm,
        renorm_clipping=renorm_clipping,
        renorm_momentum=renorm_momentum,
        fused=fused,
        trainable=trainable,
        virtual_batch_size=virtual_batch_size,
        adjustment=adjustment,
        name=name,
        _reuse=reuse,
        _scope=name)
    res = layer.apply(inputs, training=training)

    if not S("batch_norm.transform"):
        return res

    # get moving mean and variance
    moving_mean, moving_variance = layer.moving_mean, layer.moving_variance
    beta_offset, gamma_scale = layer.beta, layer.gamma

    if GLOBAL["first_layer"]:
        GLOBAL["first_layer"] = False
    else:
        pass
        # print("reformulate batchnorm")

        # apply transformation
        # --------------------
        # moving_mean = variableFromSettings([],hiddenVar=moving_mean)[0]
        # moving_variance = variableFromSettings([],hiddenVar=moving_variance)[0]
        # beta_offset = variableFromSettings([],hiddenVar=beta_offset)[0]
        # gamma_scale = variableFromSettings([],hiddenVar=gamma_scale)[0]

        # apply transformation (no var)
        # --------------------
        # sample_size = S("binom.sample_size")
        # S("binom.sample_size",set=sample_size*4)
        # gamma_scale = gamma_scale/tf.sqrt(moving_variance+layer.epsilon)
        # gamma_scale = variableFromSettings([],hiddenVar=gamma_scale/tf.sqrt(moving_variance+layer.epsilon))[0]
        # moving_variance = 0*moving_variance+1
        # moving_variance = tf.ones_like(moving_variance)
        # S("binom.sample_size",set=sample_size)

        # moving_variance = 1.0/variableFromSettings([],hiddenVar=1.0/moving_variance)[0]
        # moving_mean = fixed_point(moving_mean,8)
        # moving_mean, _ = variableFromSettings([],hiddenVar=moving_mean)
        # moving_variance = next_base2(moving_variance, strict_positive=True)
        # moving_variance = 2**tf.ceil(tf.log(tf.maximum(tf.abs(moving_variance),0))/tf.log(2.0))
    # tf.summary.histogram("bn_mean",moving_mean)
    # tf.summary.histogram("bn_var",moving_variance)

    # set moving mean and variance
    layer.moving_mean, layer.moving_variance = moving_mean, moving_variance
    layer.beta, layer.gamma = beta_offset, gamma_scale

    # reapply
    res = layer.apply(inputs, training=training)

    return res