示例#1
0
class Convolution(Layer):

    def __init__(self, input_shape, filter_sizes, init, strides=[1,1,1,1], padding='SAME', activation=None, bias=0., use_bias=True, name=None, load=None, train=True):
        self.input_shape = input_shape
        self.filter_sizes = filter_sizes
        self.batch_size, self.h, self.w, self.fin = self.input_shape
        self.fh, self.fw, self.fin, self.fout = self.filter_sizes
        self.init = init
        self.strides = strides
        _, self.sh, self.sw, _ = self.strides
        self.padding = padding
        self.activation = Linear() if activation == None else activation
        self.use_bias = use_bias
        self.name = name
        self.train_flag = train

        if load:
            print ("Loading Weights: " + self.name)
            weight_dict = np.load(load, encoding='latin1', allow_pickle=True).item()
            filters = weight_dict[self.name]
            bias = weight_dict[self.name + '_bias']
        else:
            filters = init_filters(size=self.filter_sizes, init=self.init)
            bias = np.ones(shape=self.fout) * bias

        self.filters = tf.Variable(filters, dtype=tf.float32)
        self.bias = tf.Variable(bias, dtype=tf.float32)

    ###################################################################

    def get_weights(self):
        return [(self.name, self.filters), (self.name + "_bias", self.bias)]

    def output_shape(self):
        oh = conv_output_length(self.h, self.fh, self.padding.lower(), self.sh)
        ow = conv_output_length(self.w, self.fw, self.padding.lower(), self.sw)
        od = self.fout
        return [oh, oh, od]

    def num_params(self):
        filter_weights_size = self.fh * self.fw * self.fin * self.fout
        bias_weights_size = self.fout
        return filter_weights_size + bias_weights_size

    def forward(self, X):
        Z = tf.nn.conv2d(X, self.filters, self.strides, self.padding)
        if self.use_bias:
            Z = Z + tf.reshape(self.bias, (1, 1, 1, self.fout))

        A = self.activation.forward(Z)
        return {'aout':A, 'cache':{}}

    ###################################################################
    
    def bp(self, AI, AO, DO, cache):    
        DO = tf.multiply(DO, self.activation.gradient(AO))
        DI = tf.nn.conv2d_backprop_input(input_sizes=self.input_shape, filter=self.filters, out_backprop=DO, strides=self.strides, padding=self.padding)
        
        DF = tf.nn.conv2d_backprop_filter(input=AI, filter_sizes=self.filter_sizes, out_backprop=DO, strides=self.strides, padding=self.padding)
        DB = tf.reduce_sum(DO, axis=[0, 1, 2])
        
        if self.train_flag:
            return {'dout':DI, 'cache':{}}, [(DF, self.filters), (DB, self.bias)]
        else:
            return {'dout':DI, 'cache':{}}, []

    def dfa(self, AI, AO, E, DO, cache):
        return self.bp(AI, AO, DO, cache)
        
    def lel(self, AI, AO, DO, Y, cache):
        return self.bp(AI, AO, DO, cache)
示例#2
0
class FullyConnected(Layer):
    def __init__(self,
                 input_shape,
                 size,
                 init=None,
                 activation=None,
                 bias=0.,
                 alpha=0.,
                 name=None,
                 load=None,
                 train=True):

        self.input_size = input_shape
        self.output_size = size
        self.size = [self.input_size, self.output_size]

        bias = np.ones(shape=self.output_size) * bias

        self.alpha = alpha
        self.activation = Linear() if activation == None else activation
        self.name = name
        self._train = train

        if load:
            print("Loading Weights: " + self.name)
            weight_dict = np.load(load).item()
            weights = weight_dict[self.name]
            bias = weight_dict[self.name + '_bias']
        else:
            if init == "zero":
                weights = np.zeros(shape=self.size)
            elif init == "sqrt_fan_in":
                sqrt_fan_in = math.sqrt(self.input_size)
                weights = np.random.uniform(low=-1.0 / sqrt_fan_in,
                                            high=1.0 / sqrt_fan_in,
                                            size=self.size)
            elif init == "alexnet":
                weights = np.random.normal(loc=0.0, scale=0.01, size=self.size)
            else:
                # https://www.tensorflow.org/api_docs/python/tf/glorot_uniform_initializer
                # can verify we did this right ...
                fan_in = self.input_size
                fan_out = self.output_size
                lim = np.sqrt(6. / (fan_in + fan_out))
                weights = np.random.uniform(low=-lim, high=lim, size=self.size)

        self.weights = tf.Variable(weights, dtype=tf.float32)
        self.bias = tf.Variable(bias, dtype=tf.float32)

    ###################################################################

    def get_weights(self):
        return [(self.name, self.weights), (self.name + "_bias", self.bias)]

    def set_weights(self, weight_dic):
        weights = weight_dic[self.name]
        bias = weight_dic[self.name + '_bias']
        return [self.weights.assign(weights), self.bias.assign(bias)]

    def num_params(self):
        weights_size = self.input_size * self.output_size
        bias_size = self.output_size
        return weights_size + bias_size

    def forward(self, X):
        Z = tf.matmul(X, self.weights) + self.bias
        A = self.activation.forward(Z)
        return A

    ###################################################################

    def backward(self, AI, AO, DO):
        DO = tf.multiply(DO, self.activation.gradient(AO))
        DI = tf.matmul(DO, tf.transpose(self.weights))
        return DI

    def gv(self, AI, AO, DO):
        if not self._train:
            return []

        N = tf.shape(AI)[0]
        N = tf.cast(N, dtype=tf.float32)

        DO = tf.multiply(DO, self.activation.gradient(AO))
        DW = tf.matmul(tf.transpose(AI), DO)
        DB = tf.reduce_sum(DO, axis=0)

        return [(DW, self.weights), (DB, self.bias)]

    def train(self, AI, AO, DO):
        if not self._train:
            return []

        N = tf.shape(AI)[0]
        N = tf.cast(N, dtype=tf.float32)

        DO = tf.multiply(DO, self.activation.gradient(AO))
        DW = tf.matmul(tf.transpose(AI), DO)
        DB = tf.reduce_sum(DO, axis=0)

        self.weights = self.weights.assign(
            tf.subtract(self.weights, tf.scalar_mul(self.alpha, DW)))
        self.bias = self.bias.assign(
            tf.subtract(self.bias, tf.scalar_mul(self.alpha, DB)))
        return [(DW, self.weights), (DB, self.bias)]

    ###################################################################

    def dfa_backward(self, AI, AO, E, DO):
        return tf.ones_like(AI)

    def dfa_gv(self, AI, AO, E, DO):
        if not self._train:
            return []

        N = tf.shape(AI)[0]
        N = tf.cast(N, dtype=tf.float32)

        DO = tf.multiply(DO, self.activation.gradient(AO))
        DW = tf.matmul(tf.transpose(AI), DO)
        DB = tf.reduce_sum(DO, axis=0)

        return [(DW, self.weights), (DB, self.bias)]

    def dfa(self, AI, AO, E, DO):
        if not self._train:
            return []

        N = tf.shape(AI)[0]
        N = tf.cast(N, dtype=tf.float32)

        DO = tf.multiply(DO, self.activation.gradient(AO))
        DW = tf.matmul(tf.transpose(AI), DO)
        DB = tf.reduce_sum(DO, axis=0)

        self.weights = self.weights.assign(
            tf.subtract(self.weights, tf.scalar_mul(self.alpha, DW)))
        self.bias = self.bias.assign(
            tf.subtract(self.bias, tf.scalar_mul(self.alpha, DB)))
        return [(DW, self.weights), (DB, self.bias)]

    ###################################################################

    def lel_backward(self, AI, AO, E, DO, Y):
        # DI = tf.zeros_like(AI)
        DI = self.backward(AI, AO, DO)
        return DI

    def lel_gv(self, AI, AO, E, DO, Y):
        return self.gv(AI, AO, DO)

    def lel(self, AI, AO, E, DO, Y):
        return self.train(AI, AO, DO)
示例#3
0
class Convolution(Layer):
    def __init__(self,
                 input_sizes,
                 filter_sizes,
                 strides,
                 padding,
                 init=None,
                 alpha=0.,
                 activation=None,
                 bias=0.,
                 name=None,
                 load=None,
                 train=True):
        self.input_sizes = input_sizes
        self.filter_sizes = filter_sizes
        self.batch_size, self.h, self.w, self.fin = self.input_sizes
        self.fh, self.fw, self.fin, self.fout = self.filter_sizes

        bias = np.ones(shape=self.fout) * bias

        self.strides = strides
        self.padding = padding
        self.alpha = alpha
        self.activation = Linear() if activation == None else activation
        self.name = name
        self._train = train

        if load:
            print("Loading Weights: " + self.name)
            weight_dict = np.load(load, encoding='latin1').item()
            filters = weight_dict[self.name]
            bias = weight_dict[self.name + '_bias']
        else:
            if init == "zero":
                filters = np.zeros(shape=self.filter_sizes)
            elif init == "sqrt_fan_in":
                sqrt_fan_in = math.sqrt(self.h * self.w * self.fin)
                filters = np.random.uniform(low=-1.0 / sqrt_fan_in,
                                            high=1.0 / sqrt_fan_in,
                                            size=self.filter_sizes)
            elif init == "alexnet":
                filters = np.random.normal(loc=0.0,
                                           scale=0.01,
                                           size=self.filter_sizes)
            else:
                # https://www.tensorflow.org/api_docs/python/tf/glorot_uniform_initializer
                # can verify we did this right ...
                fan_in = self.fh * self.fw * self.fin
                fan_out = self.fout
                lim = np.sqrt(6. / (fan_in + fan_out))
                filters = np.random.uniform(low=-lim,
                                            high=lim,
                                            size=self.filter_sizes)

        self.filters = tf.Variable(filters, dtype=tf.float32)
        self.bias = tf.Variable(bias, dtype=tf.float32)

    ###################################################################

    def get_weights(self):
        return [(self.name, self.filters), (self.name + "_bias", self.bias)]

    def set_weights(self, weight_dic):
        filters = weight_dic[self.name]
        bias = weight_dic[self.name + '_bias']
        return [self.filters.assign(filters), self.bias.assign(bias)]

    def output_shape(self):
        oh = conv_output_length(self.h, self.fh, self.padding.lower(), self.sh)
        ow = conv_output_length(self.w, self.fw, self.padding.lower(), self.sw)
        od = self.fout
        return [oh, oh, od]

    def num_params(self):
        filter_weights_size = self.fh * self.fw * self.fin * self.fout
        bias_weights_size = self.fout
        return filter_weights_size + bias_weights_size

    def forward(self, X):
        Z = tf.nn.conv2d(X, self.filters,
                         self.strides, self.padding) + tf.reshape(
                             self.bias, [1, 1, self.fout])
        A = self.activation.forward(Z)
        return A

    ###################################################################

    def backward(self, AI, AO, DO):
        DO = tf.multiply(DO, self.activation.gradient(AO))
        DI = tf.nn.conv2d_backprop_input(input_sizes=self.input_sizes,
                                         filter=self.filters,
                                         out_backprop=DO,
                                         strides=self.strides,
                                         padding=self.padding)
        return DI

    def gv(self, AI, AO, DO):
        if not self._train:
            return []

        DO = tf.multiply(DO, self.activation.gradient(AO))
        DF = tf.nn.conv2d_backprop_filter(input=AI,
                                          filter_sizes=self.filter_sizes,
                                          out_backprop=DO,
                                          strides=self.strides,
                                          padding=self.padding)
        DB = tf.reduce_sum(DO, axis=[0, 1, 2])
        return [(DF, self.filters), (DB, self.bias)]

    def train(self, AI, AO, DO):
        if not self._train:
            return []

        DO = tf.multiply(DO, self.activation.gradient(AO))
        DF = tf.nn.conv2d_backprop_filter(input=AI,
                                          filter_sizes=self.filter_sizes,
                                          out_backprop=DO,
                                          strides=self.strides,
                                          padding=self.padding)
        DB = tf.reduce_sum(DO, axis=[0, 1, 2])

        self.filters = self.filters.assign(
            tf.subtract(self.filters, tf.scalar_mul(self.alpha, DF)))
        self.bias = self.bias.assign(
            tf.subtract(self.bias, tf.scalar_mul(self.alpha, DB)))
        return [(DF, self.filters), (DB, self.bias)]

    ###################################################################

    def dfa_backward(self, AI, AO, E, DO):
        return tf.ones(shape=(tf.shape(AI)))

    def dfa_gv(self, AI, AO, E, DO):
        if not self._train:
            return []

        DO = tf.multiply(DO, self.activation.gradient(AO))
        DF = tf.nn.conv2d_backprop_filter(input=AI,
                                          filter_sizes=self.filter_sizes,
                                          out_backprop=DO,
                                          strides=self.strides,
                                          padding=self.padding)
        DB = tf.reduce_sum(DO, axis=[0, 1, 2])
        return [(DF, self.filters), (DB, self.bias)]

    def dfa(self, AI, AO, E, DO):
        if not self._train:
            return []

        DO = tf.multiply(DO, self.activation.gradient(AO))
        DF = tf.nn.conv2d_backprop_filter(input=AI,
                                          filter_sizes=self.filter_sizes,
                                          out_backprop=DO,
                                          strides=self.strides,
                                          padding=self.padding)
        DB = tf.reduce_sum(DO, axis=[0, 1, 2])

        self.filters = self.filters.assign(
            tf.subtract(self.filters, tf.scalar_mul(self.alpha, DF)))
        self.bias = self.bias.assign(
            tf.subtract(self.bias, tf.scalar_mul(self.alpha, DB)))
        return [(DF, self.filters), (DB, self.bias)]

    ###################################################################

    def lel_backward(self, AI, AO, E, DO, Y):
        return self.backward(AI, AO, DO)

    def lel_gv(self, AI, AO, E, DO, Y):
        return self.gv(AI, AO, DO)

    def lel(self, AI, AO, E, DO, Y):
        return self.train(AI, AO, DO)