示例#1
0
    def get_output_for(self,
                       input,
                       deterministic=False,
                       batch_norm_use_averages=None,
                       batch_norm_update_averages=None,
                       **kwargs):
        # Decide whether to use the stored averages or mini-batch statistics
        if batch_norm_use_averages is None:
            batch_norm_use_averages = deterministic
        use_averages = batch_norm_use_averages

        # Decide whether to update the stored averages
        if batch_norm_update_averages is None:
            batch_norm_update_averages = not deterministic
        update_averages = batch_norm_update_averages

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(input.ndim - len(self.axes)))
        pattern = [
            'x' if input_axis in self.axes else next(param_axes)
            for input_axis in range(input.ndim)
        ]
        # and prepare the converse pattern removing those broadcastable axes
        unpattern = [d for d in range(input.ndim) if d not in self.axes]

        # call cuDNN if needed, obtaining normalized outputs and statistics
        if not use_averages or update_averages:
            # cuDNN requires beta/gamma tensors; create them if needed
            shape = tuple(s for (d, s) in enumerate(input.shape)
                          if d not in self.axes)
            gamma = self.gamma or theano.tensor.ones(shape)
            beta = self.beta or theano.tensor.zeros(shape)
            mode = 'per-activation' if self.axes == (0, ) else 'spatial'
            (normalized, input_mean,
             input_inv_std) = dnn.dnn_batch_normalization_train(
                 input, gamma.dimshuffle(pattern), beta.dimshuffle(pattern),
                 mode, self.epsilon)

        # normalize with stored averages, if needed
        if use_averages:
            mean = self.mean.dimshuffle(pattern)
            inv_std = self.inv_std.dimshuffle(pattern)
            gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
            beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
            normalized = (input - mean) * (gamma * inv_std) + beta

        # update stored averages, if needed
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_inv_std = theano.clone(self.inv_std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = (
                (1 - self.alpha) * running_mean +
                self.alpha * input_mean.dimshuffle(unpattern))
            running_inv_std.default_update = (
                (1 - self.alpha) * running_inv_std +
                self.alpha * input_inv_std.dimshuffle(unpattern))
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            dummy = 0 * (running_mean + running_inv_std).dimshuffle(pattern)
            normalized = normalized + dummy

        return normalized
示例#2
0
    def __init__(self, layers, momentum=0.9, eps=1e-5, renorm_max_r=1.0, renorm_max_d=0.0, renorm_max_it=10, json_param={}):
        super().__init__(layer_index=len(layers))
 
        self.input = layers[-1].output
        self.input_shape = layers[-1].output_shape

        #get parameters
        self.enabled = json_param.get("enabled", True)
        self.momentum = json_param.get("momentum", momentum)

        self.renorm_max_r = json_param.get("renormMaxR", renorm_max_r)
        self.renorm_max_d = json_param.get("renormMaxD", renorm_max_d)
        self.renorm_max_it = json_param.get("renormMaxIt", renorm_max_it)
 
        self.eps = json_param.get("eps", eps)
        if self.enabled:

            #initialize param
            param_shape = (self.input_shape[1],)
            self.omega = theano.shared(numpy.asarray(numpy.ones(param_shape), dtype=theano.config.floatX), name="bn omega")
            self.beta = theano.shared(numpy.asarray(numpy.zeros(param_shape), dtype=theano.config.floatX), name="bn beta")
            self.mean = theano.shared(numpy.asarray(numpy.zeros(param_shape), dtype=theano.config.floatX), name="bn mean")
            self.stdinv = theano.shared(numpy.asarray(numpy.ones(param_shape), dtype=theano.config.floatX), name="bn std inv")

            #evaluate
            x_shape = self.input_shape
            x = self.input

            #directly call cudnn version until added to master
            dim = ['x',0,'x','x']
            use_cudnn = theano.sandbox.cuda.dnn.dnn_available() and (theano.sandbox.cuda.dnn.version() >= (5000,5000))
            # use_cudnn = theano.gpuarray.dnn.dnn_available(None) and (theano.gpuarray.dnn.version() >= 5000)
            if use_cudnn:
                from theano.sandbox.cuda.dnn import dnn_batch_normalization_train, dnn_batch_normalization_test
                # from theano.gpuarray.dnn import dnn_batch_normalization_train, dnn_batch_normalization_test
                var = tensor.sqr(1.0 / self.stdinv)
                x_n_train, x_mean, x_stdinv = dnn_batch_normalization_train(x, self.omega.dimshuffle(dim), self.beta.dimshuffle(dim), 'spatial', self.eps)
                x_n_test = dnn_batch_normalization_test(x, self.omega.dimshuffle(dim), self.beta.dimshuffle(dim), self.mean.dimshuffle(dim), var.dimshuffle(dim), 'spatial', self.eps)
                x_std = 1.0 / x_stdinv
            else:
                #WARNING: BROKEN!
                xt = x.dimshuffle((1,0,2,3)).flatten(2)
                x_mean = tensor.sum(xt, axis=1) / (self.input_shape[0]*self.input_shape[2]*self.input_shape[3])
                x_mean = tensor.cast(x_mean, "float32")
                x_std = tensor.sqrt(tensor.mean(x*x, axis=[0,2,3]) - x_mean*x_mean + self.eps)
                x_stdinv = 1.0 / x_std

                x_n_test = (x - self.mean.dimshuffle(dim)) * (self.omega * self.stdinv).dimshuffle(dim) + self.beta.dimshuffle(dim)
                x_n_train = (x - x_mean.dimshuffle(dim)) * (self.omega * x_stdinv).dimshuffle(dim) + self.beta.dimshuffle(dim)

            #override old value with renormalized version
            # if (self.renorm_max_r > 1.0) or (self.renorm_max_d > 0.0):
            #     r_alpha = math.log(self.renorm_max_r) / self.renorm_max_it
            #     d_alpha = math.log(self.renorm_max_d + 1) / self.renorm_max_it
            #     r_max = tensor.minimum(self.renorm_max_r, tensor.exp(get_epoch()*r_alpha))
            #     d_max = tensor.minimum(self.renorm_max_d, tensor.exp(get_epoch()*d_alpha) - 1)
            #     x_r = tensor.gradient.zero_grad(tensor.clip(x_std*self.stdinv, 1.0/r_max, r_max))
            #     x_d = tensor.gradient.zero_grad(tensor.clip((x_mean - self.mean) * self.stdinv, -d_max, d_max))
            #     x_n_train = (x - x_mean.dimshuffle(dim)) * (self.omega*x_stdinv*x_r).dimshuffle(dim) + (self.beta + self.omega*x_d).dimshuffle(dim)

            self.local_updates = [(self.mean, self.momentum*self.mean + (1.0 - self.momentum)*x_mean.squeeze()),
                                  (self.stdinv, self.momentum*self.stdinv + (1.0 - self.momentum)*x_stdinv.squeeze())]

            self.output_shape = self.input_shape
            self.output = tensor.switch(get_train(), tensor.as_tensor_variable(x_n_train), theano.gradient.disconnected_grad(x_n_test)) 
        else:
            self.output_shape = self.input_shape
            self.output = self.input

        logging.verbose("Adding", self)
示例#3
0
文件: dnn.py 项目: Lasagne/Lasagne
    def get_output_for(self, input, deterministic=False,
                       batch_norm_use_averages=None,
                       batch_norm_update_averages=None, **kwargs):
        # Decide whether to use the stored averages or mini-batch statistics
        if batch_norm_use_averages is None:
            batch_norm_use_averages = deterministic
        use_averages = batch_norm_use_averages

        # Decide whether to update the stored averages
        if batch_norm_update_averages is None:
            batch_norm_update_averages = not deterministic
        update_averages = batch_norm_update_averages

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(input.ndim - len(self.axes)))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]
        # and prepare the converse pattern removing those broadcastable axes
        unpattern = [d for d in range(input.ndim) if d not in self.axes]

        # call cuDNN if needed, obtaining normalized outputs and statistics
        if not use_averages or update_averages:
            # cuDNN requires beta/gamma tensors; create them if needed
            shape = tuple(s for (d, s) in enumerate(input.shape)
                          if d not in self.axes)
            gamma = self.gamma or theano.tensor.ones(shape)
            beta = self.beta or theano.tensor.zeros(shape)
            mode = 'per-activation' if self.axes == (0,) else 'spatial'
            (normalized,
             input_mean,
             input_inv_std) = dnn.dnn_batch_normalization_train(
                    input, gamma.dimshuffle(pattern), beta.dimshuffle(pattern),
                    mode, self.epsilon)

        # normalize with stored averages, if needed
        if use_averages:
            mean = self.mean.dimshuffle(pattern)
            inv_std = self.inv_std.dimshuffle(pattern)
            gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
            beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
            normalized = (input - mean) * (gamma * inv_std) + beta

        # update stored averages, if needed
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_inv_std = theano.clone(self.inv_std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean.dimshuffle(unpattern))
            running_inv_std.default_update = ((1 - self.alpha) *
                                              running_inv_std +
                                              self.alpha * input_inv_std.dimshuffle(unpattern))
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            dummy = 0 * (running_mean + running_inv_std).dimshuffle(pattern)
            normalized = normalized + dummy

        return normalized
示例#4
0
if __name__ == '__main__':

    shape = (1, 4, 4, 4)
    f_g = theano.shared(numpy.ones(shape=(shape[1], ), dtype='float32'))
    f_b = theano.shared(numpy.zeros(shape=(shape[1], ), dtype='float32'))
    f_x = theano.tensor.tensor4()

    f0_y, f0_mean, f0_std = dnn_bnrelu_train(f_x, f_g[None, :, None, None],
                                             f_b[None, :, None,
                                                 None], "spatial")
    f0_yg = theano.tensor.grad(f0_y.sum(), f_x)
    f0_yg = theano.printing.Print('R0:')(f0_yg)
    f0 = theano.function([f_x], [f0_y, f0_y.sum(), f0_yg])

    f1_xn, f1_mean, f1_std = dnn_batch_normalization_train(
        f_x, f_g[None, :, None, None], f_b[None, :, None, None], "spatial")
    f1_y = tensor.maximum(f1_xn, 0.0)
    f1_yg = theano.tensor.grad(f1_y.sum(), f_x)
    f1_yg = theano.printing.Print('R1:')(f1_yg)
    f1 = theano.function([f_x], [f1_y, f1_y.sum(), f1_yg])

    x = numpy.random.uniform(-5.0, 5.0, shape).astype(numpy.float32)
    y0, ys0, yy0 = f0(x)
    y1, ys1, yy1 = f1(x)

    print("X Mean:", x.mean(axis=(0, 2, 3)))
    print("X std:", x.std(axis=(0, 2, 3)))

    print("------X------")
    print(numpy.array(x))