Exemplo n.º 1
0
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        # 重み初期化
        W1 = 0.01 * np.random.randn(I, H)
        W2 = 0.01 * np.random.randn(H, O)

        # バイアスの初期化
        b1 = np.zeros(H)
        b2 = np.zeros(O)

        # レイヤ生成
        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        # 全ての重みと勾配をリストにまとめる
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
Exemplo n.º 2
0
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        # 가중치와 편향 초기화
        W1 = 0.01 * np.random.randn(I, H)
        b1 = np.zeros(H)
        W2 = 0.01 * np.random.randn(H, O)
        b2 = np.zeros(O)

        # 계층 생성
        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모은다.
        # grads가 언제나 깊은 복사를 함.
        # 이렇게 하면, 기울기를 그룹화하는 작업을 최초에 한번만 하면 된다는 이점이 생긴다.
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = 0.5 * (h0 + h1)
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)

        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        # initialize weight and bias
        W1 = 0.01 * np.random.randn(I, H)
        b1 = np.zeros(H)
        W2 = 0.01 * np.random.randn(H, O)
        b2 = np.zeros(O)

        # create layer
        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        # combine all weight and grads into list
        self.params, self.grads = [], []

        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        W1 = 0.01 * np.random.randn(input_size, hidden_size)
        b1 = np.zeros(hidden_size)
        W2 = 0.01 * np.random.randn(hidden_size, output_size)
        b2 = np.zeros(output_size)

        self.layers = [
            Affine(W1, b1),
            Sigmoid(),
            Affine(W2, b2)
        ]

        self.loss_layer = SoftmaxWithLoss()

        self.params, self.grads = [], []

        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
Exemplo n.º 6
0
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        # 가중치와 편향 초기화
        W1 = 0.01 * np.random.randn(I, H)
        b1 = np.zeros(H)
        W2 = 0.01 * np.random.randn(H, O)
        b2 = np.zeros(O)

        # 계층 생성
        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모은다.
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(
            dout)  # softmax 역전파로 나온 dx : (30,3) -> 정답 인덱스에만 음수로 나타내어짐
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
Exemplo n.º 7
0
class Network:

    def __init__(self):
        self.__input_size = 28 ** 2   # MNIST data is 28 x 28 pixel.
        self.__hidden_size = 50       # Hidden layer size.
        self.__output_size = 10       # Output is an one-hot array from 0 to 9.
        self.__weight_init_std = 0.01 # Standard deviation for initial weights    
        
        # Initialize weights and biases.
        self.params = {}
        self.params['W1'] = self.__weight_init_std * np.random.randn(self.__input_size, self.__hidden_size)
        self.params['b1'] = np.zeros(self.__hidden_size)
        self.params['W2'] = self.__weight_init_std * np.random.randn(self.__hidden_size, self.__output_size) 
        self.params['b2'] = np.zeros(self.__output_size)

        # Generate layers.
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

        self.lastLayer = SoftmaxWithLoss()
        
        
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        
        return x
        
    # x:input data, t:teacher data
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
        
    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # return values
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

        return grads
    
    def repr(self):
        print("Input -> Affine(W1+b1) -> Affine(W2+b2) -> SoftMax")
        self.__set_format__("{:.2f}")
        for key in self.params.keys():
            print(key + str(self.params[key].shape))
            print(self.params[key])
    
    def __set_format__(self, format_):
        float_formatter = format_.format
        np.set_printoptions(formatter = {'float_kind':float_formatter})
Exemplo n.º 8
0
class TwoLayerNet:

    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        # 重みの初期化
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) 
        self.params['b2'] = np.zeros(output_size)

        # レイヤの生成
        self.layers = OrderedDict() # 順番付きdict形式.
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = ReLU()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.lastLayer = SoftmaxWithLoss() # 出力層
        
    def predict(self, x):
        """
        推論関数
        x : 入力
        """
        for layer in self.layers.values():
            # 入力されたxを更新していく = 順伝播計算
            x = layer.forward(x)
        
        return x
        
    def loss(self, x, t):
        """
        損失関数
        x:入力データ, t:教師データ
        """
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def accuracy(self, x, t):
        """
        識別精度
        """
        # 推論. 返り値は正規化されていない実数
        y = self.predict(x)
        #正規化されていない実数をもとに、最大値になるindexに変換する
        y = np.argmax(y, axis=1)
        
        if t.ndim != 1 : 
            """
            one-hotベクトルの場合、教師データをindexに変換する
            """
            t = np.argmax(t, axis=1)
        
        # 精度
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
        
    def gradient(self, x, t):
        """
        全パラメータの勾配を計算
        """
        
        # 順伝播
        self.loss(x, t)

        # 逆伝播
        dout = 1 # クロスエントロピー誤差を用いる場合は使用されない
        dout = self.lastLayer.backward(dout=1) # 出力層
        
        ## doutを逆向きに伝える 
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # dW, dbをgradsにまとめる
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

        return grads
Exemplo n.º 9
0
Arquivo: nn.py Projeto: hiranotnsk2/SA
class SimpleConvNet:
    def __init__(self, input_dim=(1, 28, 28), 
                 conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
                 hidden_size=100, output_size=10, weight_init_std=0.01):
        """
        input_size : 入力の配列形状(チャンネル数、画像の高さ、画像の幅)
        conv_param : 畳み込みの条件, dict形式  例、{'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1}
        hidden_size : 隠れ層のノード数
        output_size : 出力層のノード数
        weight_init_std : 重みWを初期化する際に用いる標準偏差
        """
                
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))

        # 重みの初期化
        self.params = {}
        std = weight_init_std
        self.params['W1_1'] = std * np.random.randn(16, 1, 3,3) 
        self.params['b1_1'] = np.zeros(16)
        self.params['W1_3'] = std * np.random.randn(16, 16, 3,3) 
        self.params['b1_3'] = np.zeros(16)

        self.params['W2_1'] = std * np.random.randn(32, 16, 3,3) 
        self.params['b2_1'] = np.zeros(32)
        self.params['W2_3'] = std * np.random.randn(32, 32, 3,3) 
        self.params['b2_3'] = np.zeros(32)

        self.params['W3_1'] = std * np.random.randn(64, 32, 3,3) 
        self.params['b3_1'] = np.zeros(64)
        self.params['W3_3'] = std * np.random.randn(64, 64, 3,3) 
        self.params['b3_3'] = np.zeros(64)
        
        
        self.params['W4_1'] = std * np.random.randn(64*4*4, hidden_size)
        self.params['b4_1'] = np.zeros(hidden_size)
        self.params['W5_1'] = std * np.random.randn(hidden_size, output_size)
        self.params['b5_1'] = np.zeros(output_size)

        # レイヤの生成
        self.layers = OrderedDict()
        self.layers['Conv1_1'] = Convolution(self.params['W1_1'], self.params['b1_1'],1,1)
        self.layers['ReLU1_2'] = ReLU()
        self.layers['Conv1_3'] = Convolution(self.params['W1_3'], self.params['b1_3'],1,1)
        self.layers['ReLU1_4'] = ReLU()
        self.layers['Pool1_5'] = MaxPooling(pool_h=2, pool_w=2, stride=2)

        self.layers['Conv2_1'] = Convolution(self.params['W2_1'], self.params['b2_1'],1,1)
        self.layers['ReLU2_2'] = ReLU()
        self.layers['Conv2_3'] = Convolution(self.params['W2_3'], self.params['b2_3'],1,2)
        self.layers['ReLU2_4'] = ReLU()
        self.layers['Pool2_5'] = MaxPooling(pool_h=2, pool_w=2, stride=2)

        self.layers['Conv3_1'] = Convolution(self.params['W3_1'], self.params['b3_1'],1,1)
        self.layers['ReLU3_2'] = ReLU()
        self.layers['Conv3_3'] = Convolution(self.params['W3_3'], self.params['b3_3'],1,1)
        self.layers['ReLU3_4'] = ReLU()
        self.layers['Pool3_5'] = MaxPooling(pool_h=2, pool_w=2, stride=2)        
        
        
        self.layers['Affine4_1'] = Affine(self.params['W4_1'], self.params['b4_1'])
        self.layers['ReLU4_2'] = ReLU()        
        self.layers['Dropout4_3'] = Dropout(0.5)
        
        self.layers['Affine5_1'] = Affine(self.params['W5_1'], self.params['b5_1'])
        self.layers['Dropout5_2'] = Dropout(0.5)

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        """
        損失関数
        x : 入力データ
        t : 教師データ
        """
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        
        acc = 0.0
        
        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt) 
        
        return acc / x.shape[0]

    def gradient(self, x, t):
        """勾配を求める(誤差逆伝播法)
        Parameters
        ----------
        x : 入力データ
        t : 教師データ
        Returns
        -------
        各層の勾配を持ったディクショナリ変数
            grads['W1']、grads['W2']、...は各層の重み
            grads['b1']、grads['b2']、...は各層のバイアス
        """
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 設定
        grads = {}
        grads['W1_1'], grads['b1_1'] = self.layers['Conv1_1'].dW, self.layers['Conv1_1'].db
        grads['W1_3'], grads['b1_3'] = self.layers['Conv1_3'].dW, self.layers['Conv1_3'].db
        grads['W2_1'], grads['b2_1'] = self.layers['Conv2_1'].dW, self.layers['Conv2_1'].db
        grads['W2_3'], grads['b2_3'] = self.layers['Conv2_3'].dW, self.layers['Conv2_3'].db
        grads['W3_1'], grads['b3_1'] = self.layers['Conv3_1'].dW, self.layers['Conv3_1'].db
        grads['W3_3'], grads['b3_3'] = self.layers['Conv3_3'].dW, self.layers['Conv3_3'].db
        grads['W4_1'], grads['b4_1'] = self.layers['Affine4_1'].dW, self.layers['Affine4_1'].db
        grads['W5_1'], grads['b5_1'] = self.layers['Affine5_1'].dW, self.layers['Affine5_1'].db

        return grads
Exemplo n.º 10
0
class SimpleConvNet:
    def __init__(self, input_dim=(1, 28, 28),
                conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
                hidden_size=100, output_size=10, weight_init_std=0.01):
        
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))

        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'],
                                            self.params['b1'],
                                            conv_param['stride'],
                                            conv_param['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1']= Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
        self.last_layer = SoftmaxWithLoss()


    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        
        return x


    def loss(self, x, t):
        y = self.predict(x)
        return self.last_layer.forward(y, t)


    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        
        acc = 0.0
        
        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt) 
        
        return acc / x.shape[0]


    def gradient(self, x, t):
        #순전파
        self.loss(x, t)

        #역전파
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        grads = {}
        grads['W1'] = self.layers['Conv1'].dW
        grads['b1'] = self.layers['Conv1'].db
        grads['W2'] = self.layers['Affine1'].dW
        grads['b2'] = self.layers['Affine1'].db
        grads['W3'] = self.layers['Affine2'].dW
        grads['b3'] = self.layers['Affine2'].db

        return grads
Exemplo n.º 11
0
class SimpleConvNet:
    def __init__(self, dim_in=(1, 28, 28),
                 par={'num_filter': 30, 'size_filter': 5, 'pad': 0, 'stride': 1},
                 s_hidden=100, s_out=10, std_w_init=0.01):
        n_f = par['num_filter']
        s_f = par['size_filter']
        pad = par['pad']
        stride = par['stride']
        size_in = dim_in[1]

        size_out_conv = int((size_in + 2 * pad - s_f) / stride) + 1
        size_out_pool = int(n_f * (size_out_conv / 2) ** 2)

        self.params = {}
        self.params['W1'] =\
            std_w_init * np.random.randn(n_f, dim_in[0], s_f, s_f)
        self.params['b1'] = np.zeros(n_f)
        self.params['W2'] = std_w_init * np.random.randn(size_out_pool, s_hidden)
        self.params['b2'] = np.zeros(s_hidden)
        self.params['W3'] = std_w_init * np.random.randn(s_hidden, s_out)
        self.params['b3'] = np.zeros(s_out)

        self.layers = OrderedDict()
        self.layers['Conv'] = Convolution(self.params['W1'], self.params['b1'],
                                          stride, pad)
        self.layers['Relu1'] = Relu()
        self.layers['Pool'] = Pooling(2, 2, 2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)
        loss = self.last_layer.forward(y, t)
        return loss

    def accuracy(self, x, t):
        y = self.predict(x)
        pred = y.argmax(axis=1)
        if t.ndim != 1:
            t = t.argmax(axis=1)
        return np.sum(pred == t) / float(pred.size)

    def gradient(self, x, t):
        self.loss(x, t)

        dout = 1
        dout = self.last_layer.backward(dout)
        for layer in reversed(self.layers.values()):
            dout = layer.backward(dout)

        grads = {}
        grads['W1'] = self.layers['Conv'].dW
        grads['b1'] = self.layers['Conv'].db
        grads['W2'] = self.layers['Affine1'].dW
        grads['b2'] = self.layers['Affine1'].db
        grads['W3'] = self.layers['Affine2'].dW
        grads['b3'] = self.layers['Affine2'].db
        return grads
Exemplo n.º 12
0
class SimpleConvNet:
    def __init__(self,
                 input_dim=(1, 28, 28),
                 conv_param={
                     'filter_num': 30,
                     'filter_size': 5,
                     'pad': 0,
                     'stride': 1
                 },
                 hidden_size=100,
                 output_size=10,
                 weight_init_std=0.01,
                 weight_decay_lambda=0.01):
        """
        input_size : 入力の配列形状(チャンネル数、画像の高さ、画像の幅)
        conv_param : 畳み込みの条件, dict形式  例、{'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1}
        hidden_size : 隠れ層のノード数
        output_size : 出力層のノード数
        weight_init_std : 重みWを初期化する際に用いる標準偏差
        """
        self.hidden_layer_num = 3
        self.weight_decay_lambda = weight_decay_lambda
        #filter_num = conv_param['filter_num']
        #filter_size = conv_param['filter_size']
        #filter_pad = conv_param['pad']
        #filter_stride = conv_param['stride']
        filter_num = 30
        filter_size = 5
        filter_pad = 0
        filter_stride = 1
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size +
                            2 * filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size / 2) *
                               (conv_output_size / 2))

        # 重みの初期化
        self.params = {}
        std = weight_init_std
        self.params['W1'] = std * np.random.randn(
            filter_num, input_dim[0], filter_size,
            filter_size)  # W1は畳み込みフィルターの重みになる
        self.params['b1'] = np.zeros(filter_num)  #b1は畳み込みフィルターのバイアスになる
        #self.params['W2'] = std *  np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        #self.params['W3'] = std *  np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        #Heの初期値を使用
        self.params['W2'] = np.random.randn(pool_output_size,
                                            hidden_size) * he(pool_output_size)
        self.params['W3'] = np.random.randn(hidden_size,
                                            output_size) * he(hidden_size)

        # レイヤの生成
        self.layers = OrderedDict()
        #self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
        #                                   conv_param['stride'], conv_param['pad']) # W1が畳み込みフィルターの重み, b1が畳み込みフィルターのバイアスになる
        self.layers['Conv1'] = Convolution(
            self.params['W1'], self.params['b1'], 1,
            0)  # W1が畳み込みフィルターの重み, b1が畳み込みフィルターのバイアスになる
        self.layers['ReLU1'] = ReLU()
        self.layers['Pool1'] = MaxPooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['ReLU2'] = ReLU()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        """
        損失関数
        x : 入力データ
        t : 教師データ
        """
        y = self.predict(x)

        # 荷重減衰を考慮した損失を求める
        lmd = self.weight_decay_lambda
        weight_decay = 0
        for idx in range(1, self.hidden_layer_num + 1):
            W = self.params['W' + str(idx)]

            # 全ての行列Wについて、1/2* lambda * Σwij^2を求め、積算していく
            weight_decay += 0.5 * lmd * np.sum(W**2)

        return self.last_layer.forward(y, t) + weight_decay

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1: t = np.argmax(t, axis=1)

        acc = 0.0

        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i * batch_size:(i + 1) * batch_size]
            tt = t[i * batch_size:(i + 1) * batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

        return acc / x.shape[0]

    def gradient(self, x, t):
        """勾配を求める(誤差逆伝播法)
        Parameters
        ----------
        x : 入力データ
        t : 教師データ

    減衰を考慮した損失を求める
        lmd = self.weight_decay_lambda        
        weight_decay = 0
        for idx in range(1, self.hidden_layer_num + 2):
            W = self.params['W' + str(idx)]
            
            # 全ての行列Wについて、1/2* lambda * Σwij^2を求め、積算していく
            weight_decay += 0.5 * lmd * np.sum(W**2)

        return self.lastLayer.forward(y, t) + weight_decay

        -------
        各層の勾配を持ったディクショナリ変数
            grads['W1']、grads['W2']、...は各層の重み
            grads['b1']、grads['b2']、...は各層のバイアス
        """
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 設定
        # 荷重減衰を考慮しながら、dW, dbをgradsにまとめる
        lmd = self.weight_decay_lambda
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers[
            'Conv1'].db
        grads['W2'], grads['b2'] = self.layers[
            'Affine1'].dW + lmd * self.layers['Affine1'].W, self.layers[
                'Affine1'].db
        grads['W3'], grads['b3'] = self.layers[
            'Affine2'].dW + lmd * self.layers['Affine2'].W, self.layers[
                'Affine2'].db

        return grads

    def save_params(self, file_name="CNNparams.pkl"):

        params = {}
        #for key, val in self.params.items():
        #    params[key] = val

        print("W1Start")
        params['W1'] = self.params['W1']
        print("b1Start")
        params['b1'] = self.params['b1']
        print("W2Start")
        params['W2'] = self.params['W2']
        print("b2Start")
        params['b2'] = self.params['b2']
        print("W3Start")
        params['W3'] = self.params['W3']
        print("b3Start")
        params['b3'] = self.params['b3']

        with open(file_name, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self, file_name="CNNparams.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        #for key, val in params.items():
        #    self.params[key] = val

        #for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
        #    self.layers[layer_idx].W = self.params['W' + str(i+1)]
        #    self.layers[layer_idx].b = self.params['b' + str(i+1)]
        self.params['W1'] = params['W1']
        self.params['b1'] = params['b1']
        self.params['W2'] = params['W2']
        self.params['b2'] = params['b2']
        self.params['W3'] = params['W3']
        self.params['b3'] = params['b3']

    def make_layers(self):
        # レイヤの生成
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(
            self.params['W1'], self.params['b1'], 1,
            0)  # W1が畳み込みフィルタの重み, b1が畳み込みフィルタのバイアスになる
        self.layers['ReLU1'] = ReLU()
        self.layers['Pool1'] = MaxPooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['ReLU2'] = ReLU()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()
Exemplo n.º 13
0
class SimpleConvNet:
    def __init__(self,
                 input_dim=(1, 28, 28),
                 conv_param={
                     'filter_num': 30,
                     'filter_size': 5,
                     'pad': 0,
                     'stride': 1
                 },
                 hidden_size=100,
                 output_size=10,
                 weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]

        # 畳み込み層の出力サイズの計算
        conv_output_size = (input_size - filter_size +
                            2 * filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size / 2) *
                               (conv_output_size))

        # 重みパラメータの初期化 (1: 畳み込み層、2: 全結合、3: 全結合)
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(
            filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * np.random.randn(
            pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * np.random.randn(
            hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        # レイヤの生成
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'],
                                           self.params['b1'],
                                           conv_param['stride'],
                                           conv_param['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.last_layer.backward(y, t)

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 設定
        grads = {}
        grads['W1'] = self.layers['Conv1'].dW
        grads['b1'] = self.layers['Conv1'].db
        grads['W2'] = self.layers['Affine1'].dW
        grads['b2'] = self.layers['Affine1'].db
        grads['W3'] = self.layers['Affine2'].dW
        grads['b3'] = self.layers['Affine2'].db

        return grads
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 初始化权重
        # self.params = {"W1": np.random.randn(input_size, hidden_size) / np.sqrt(input_size),
        #                "b1": np.zeros(hidden_size),
        #                "W2": np.random.randn(hidden_size, output_size) / np.sqrt(hidden_size),
        #                "b2": np.zeros(output_size)}
        self.params = {"W1": weight_init_std * np.random.randn(input_size, hidden_size),
                       "b1": np.zeros(hidden_size),
                       "W2": weight_init_std * np.random.randn(hidden_size, output_size),
                       "b2": np.zeros(output_size)}
        # 生成层
        self.layers = OrderedDict()
        self.layers["Affine1"] = Affine(self.params["W1"], self.params["b1"])
        self.layers["ReLU1"] = ReLU()
        self.layers["Affine2"] = Affine(self.params["W2"], self.params["b2"])
        self.lastLayer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        y = self.predict(x)

        return self.lastLayer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        grads = {'W1': numerical_gradient(loss_W, self.params['W1']),
                 'b1': numerical_gradient(loss_W, self.params['b1']),
                 'W2': numerical_gradient(loss_W, self.params['W2']),
                 'b2': numerical_gradient(loss_W, self.params['b2'])}

        return grads

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        grads = {'W1': self.layers["Affine1"].dW, 'b1': self.layers["Affine1"].db, 'W2': self.layers["Affine2"].dW,
                 'b2': self.layers["Affine2"].db}

        return grads
Exemplo n.º 15
0
class DPLMultiLayerNet:
    """全結合による多層ニューラルネットワーク

    Parameters
    ----------
    input_size : 入力サイズ(MNISTの場合は784)
    hidden_size_list : 隠れ層のニューロンの数のリスト(e.g. [100, 100, 100])
    output_size : 出力サイズ(MNISTの場合は10)
    activation : 'relu' or 'sigmoid' or 'through'
    weight_init_std : 重みの標準偏差を指定(e.g. 0.01)
        'relu'または'he'を指定した場合は「Heの初期値」を設定
        'sigmoid'または'xavier'を指定した場合は「Xavierの初期値」を設定
        ’linear'または'through'を指定した場合は、「Linearの初期値」を設定
        DLの重み:正規乱数、DPLの重み:正規乱数の絶対値
    """
    def __init__(self, input_size, hidden_size_list, output_size,batch_size=1,
                 activation='sigmoid', weight_init_std='linear',dpl='dpl'):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.batch_size = batch_size
        self.dpl = dpl
        self.params = {}
        self.layers = OrderedDict()
        # 重みの初期化
        self.__init_weight(weight_init_std,dpl)
        # レイヤの生成
        self.__init_wb(activation,dpl)
        #print("layers:",self.layers)
        self.update_path = update_path(self.layers)
    
    def __init_wb(self,activation,dpl) :
        """レイヤー生成
            activation : 'relu' or 'sigmoid' or 'through'
        """
        activation_layer = {'sigmoid': DPLSigmoid, 'relu': DPLRelu,'through':Through}

        #print("_init_wb activation",activation,"dpl",dpl)
        self.layers['Path1'] = FirstPath(self.params['W1'],self.params['b1'],self.batch_size)
        self.layers['Activation_function1'] = activation_layer[activation]()
        
        for idx in range(2, self.hidden_layer_num+1):
            #print("idx:",idx)
            self.layers['Path' + str(idx)] =DPLPath(self.params['W' + str(idx)],
                                                      self.params['b' + str(idx)])
            self.layers['Activation_function' + str(idx)] = activation_layer[activation]()

        idx = self.hidden_layer_num + 1
        self.layers['Path' + str(idx)] = LastPath(self.params['W' + str(idx)],
            self.params['b' + str(idx)],self.batch_size)

        self.last_layer = SoftmaxWithLoss()

    def __init_weight(self, weight_init_std,dpl):
        """重みの初期値設定

        Parameters
        ----------
        weight_init_std : 重みの標準偏差を指定(e.g. 0.01)
            'relu'または'he'を指定した場合は「Heの初期値」を設定
            'sigmoid'または'xavier'を指定した場合は「Xavierの初期値」を設定
            ’linear'または'through'を指定した場合は、「Linearの初期値」を設定
            DLの重み:正規乱数、DPLの重み:正規乱数の絶対値
        """
        all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
        #print ("all_size_list:",all_size_list)
        for idx in range(1, len(all_size_list)):
            #print("idx:",idx)
            scale = weight_init_std
            if str(weight_init_std).lower() in ('relu', 'he'):
                scale = np.sqrt(2.0 / all_size_list[idx - 1])  # ReLUを使う場合に推奨される初期値
            elif str(weight_init_std).lower() in ('sigmoid', 'xavier'):
                scale = np.sqrt(1.0 / all_size_list[idx - 1])  # sigmoidを使う場合に推奨される初期値
            elif str(weight_init_std).lower() in ('linear','through'):
                scale = 1.0/all_size_list[idx - 1]           #DPLで使う場合に推奨される初期値
            rand = np.random.randn(all_size_list[idx-1], all_size_list[idx])
            #if idx == 1: scale/self.batch_size
            if dpl == 'dpl':rand = np.fabs(rand)
            self.params['W' + str(idx)] = scale * rand
            self.params['b' + str(idx)] = np.zeros(all_size_list[idx])
        #print("param:",self.params)
            
    def set_batch(self,x,t) :
        self.x = x
        self.t = t
        self.batch_size = x.shape[0]

    def predict(self):
        x = self.x
        for layer in self.layers.values():
            x = layer.forward(x)
            #print("Predict x:",x.shape,"self.x",self.x.shape)
        return x
        
    def DPLpredict(self):
        self.update_path.update()
        x = self.x
        for layer in self.layers.values():
            x = layer.DPLforward(x)
            #print("DPLPredict x:",x.shape,"self.x",self.x.shape)
        return x

    def loss(self):
        """損失関数を求める
        Returns
        -------
        損失関数の値
        """
        if self.dpl == 'dpl' :
            y = self.DPLpredict()
        else:
            y = self.predict()
        #print("loss y:",y.shape,"t",self.t.shape)    

        return self.last_layer.forward(y, self.t)

    def accuracy(self):
        y = self.predict()
        y = np.argmax(y, axis=1)
        #print("accracy y:",y.shape,"t:",self.t.shape)
        if self.t.ndim != 1 : t = np.argmax(self.t, axis=1)
        accuracy = np.sum(y == t) / float(self.x.shape[0])
        return accuracy

    def numerical_gradient(self):
        """勾配を求める(数値微分)
        Returns
        -------
        各層の勾配を持ったディクショナリ変数
            grads['W1']、grads['W2']、...は各層の重み
            grads['b1']、grads['b2']、...は各層のバイアス
        """
        loss_W = lambda W: self.loss()

        grads = {}
        for idx in range(1, self.hidden_layer_num+2):
            grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)])
            grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)])

        return grads

    def gradient(self):
        """勾配を求める(誤差逆伝搬法)
        Returns
        -------
        各層の勾配を持ったディクショナリ変数
            grads['W1']、grads['W2']、...は各層の重み
            grads['b1']、grads['b2']、...は各層のバイアス
        """
        # forward
        self.loss()

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        if self.dpl == 'dpl' :
            for layer in layers:
                dout = layer.DPLbackward(dout)   # Fix comfirmed "backward" was not work for DPLforward
        else:
            for layer in layers:
                dout = layer.backward(dout)   

        # 設定
        grads = {}
        for idx in range(1, self.hidden_layer_num+2):
            grads['W' + str(idx)] = self.layers['Path' + str(idx)].dW
            grads['b' + str(idx)] = self.layers['Path' + str(idx)].db

        return grads
Exemplo n.º 16
0
class TwoLayerNet:

    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        # 重みの初期化
        self.params = {}
        #self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        #self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) 
        self.params['b2'] = np.zeros(output_size)
        
        #Heの初期値を使用
        self.params['W1'] = np.random.randn(input_size, hidden_size) * he(input_size)
        self.params['W2'] = np.random.randn(hidden_size, output_size) * he(hidden_size)

        # レイヤの生成
        self.layers = OrderedDict() # 順番付きdict形式.
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = ReLU()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.lastLayer = SoftmaxWithLoss() # 出力層
        
    def predict(self, x):
        """
        推論関数
        x : 入力
        """
        for layer in self.layers.values():
            # 入力されたxを更新していく = 順伝播計算
            x = layer.forward(x)
        
        return x
        
    def loss(self, x, t):
        """
        損失関数
        x:入力データ, t:教師データ
        """
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def accuracy(self, x, t):
        """
        識別精度
        """
        # 推論. 返り値は正規化されていない実数
        y = self.predict(x)
        #正規化されていない実数をもとに、最大値になるindexに変換する
        y = np.argmax(y, axis=1)
        
        if t.ndim != 1 : 
            """
            one-hotベクトルの場合、教師データをindexに変換する
            """
            t = np.argmax(t, axis=1)
        
        # 精度
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
        
    def gradient(self, x, t):
        """
        全パラメータの勾配を計算
        """
        
        # 順伝播
        self.loss(x, t)

        # 逆伝播
        dout = 1 # クロスエントロピー誤差を用いる場合は使用されない
        dout = self.lastLayer.backward(dout=1) # 出力層
        
        ## doutを逆向きに伝える 
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # dW, dbをgradsにまとめる
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

        return grads
    
    def save_params(self, file_name="params.pkl"):
        params = {}
        #for key, val in self.params.items():
        #    params[key] = val

        print("W1Start")
        params['W1'] = self.params['W1']
        print("b1Start")
        params['b1'] = self.params['b1']
        print("W2Start")
        params['W2'] = self.params['W2'] 
        print("b2Start")
        params['b2'] = self.params['b2']
        
        with open(file_name, 'wb') as f:
            pickle.dump(params, f)
        
    def load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        #for key, val in params.items():
        #    self.params[key] = val

        #for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
        #    self.layers[layer_idx].W = self.params['W' + str(i+1)]
        #    self.layers[layer_idx].b = self.params['b' + str(i+1)]
    
        self.params['W1'] = params['W1']
        self.params['b1'] = params['b1']
        self.params['W2'] = params['W2']
        self.params['b2'] = params['b2']
    
    
    
    def he(n1):
    """
    Heの初期値を利用するための関数
    返り値は、見かけの標準偏差
    """    
    return np.sqrt(2/n1)
    
    
Exemplo n.º 17
0
class TwoLayerNet:
    def __init__(self,
                 inputLayerSize,
                 hiddenLayerSize,
                 ouputLayerSize,
                 distributionScale=0.01):
        # Initialize weight
        self.params = {}
        self.params['w1'] = distributionScale * np.random.randn(
            inputLayerSize, hiddenLayerSize)
        self.params['b1'] = np.zeros(hiddenLayerSize)
        self.params['w2'] = distributionScale * np.random.randn(
            hiddenLayerSize, ouputLayerSize)
        self.params['b2'] = np.zeros(ouputLayerSize)

        # Create layers
        self.layers = OrderedDict()
        self.layers['affine1'] = Affine(self.params['w1'], self.params['b1'])
        self.layers['relu1'] = Relu()
        self.layers['affine2'] = Affine(self.params['w2'], self.params['b2'])
        self.lastLayer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def getLoss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)

    def getAccuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def getGradient(self, x, t):
        # forward
        self.getLoss(x, t)

        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        gradients = {}
        gradients['w1'], gradients['b1'] = self.layers[
            'affine1'].dw, self.layers['affine1'].db
        gradients['w2'], gradients['b2'] = self.layers[
            'affine2'].dw, self.layers['affine2'].db
        return gradients

    # Numerical gradient for recalculation
    def getNumericalGradient(self, x, t):
        loss = lambda W: self.getLoss(x, t)
        gradients = {}
        gradients['w1'] = numericalGradient(loss, self.params['w1'])
        gradients['b1'] = numericalGradient(loss, self.params['b1'])
        gradients['w2'] = numericalGradient(loss, self.params['w2'])
        gradients['b2'] = numericalGradient(loss, self.params['b2'])
        return gradients
class MultiLayerNexExtend:
    """
    具有weight Decay、Dropout、Batch Normalization功能的全连接多层神经网络
    """
    def __init__(self,
                 input_size,
                 hidden_size_list,
                 output_size,
                 activation="relu",
                 weight_init_std="relu",
                 weight_decay_lambda=0,
                 use_dropout=False,
                 dropout_ratio=0.5,
                 use_batchnorm=False):
        """
        :param input_size: 输入的大小
        :param hidden_size_list: 隐藏层的神经元数量列表
        :param output_size: 输出的大小
        :param activation: "relu" or "sigmoid"
        :param weight_init_std: 指定权重的标准差,
        指定"relu" 或者 "he" 是定为"He"的初始值
        指定"sigmoid" 或者 "xavier" 是定为"Xauver"的初始值
        :param weight_decay_lambda: Weight Decay(L2范数)的强度
        :param use_dropout: 是否使用Dropout
        :param dropout_ratio: Dropout比例
        :param use_batchnorm: 是否只用Batch Normalization
        """
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.use_dropout = use_dropout
        self.weight_decay_lambda = weight_decay_lambda
        self.use_batchnorm = use_batchnorm
        self.params = {}

        # 初始化权值
        self.__init_weight(weight_init_std)

        # 生成层
        activation_layer = {"sigmoid": Sigmoid, "relu": ReLU}
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers["Affine" + str(idx)] = Affine(
                self.params["W" + str(idx)], self.params["b" + str(idx)])
            if self.use_batchnorm:
                self.params["gamma" + str(idx)] = np.ones(
                    hidden_size_list[idx - 1])
                self.params["beta" + str(idx)] = np.zeros(
                    hidden_size_list[idx - 1])
                self.layers['BatchNorm' + str(idx)] = BatchNormalization(
                    self.params['gamma' + str(idx)],
                    self.params['beta' + str(idx)])

            self.layers["Activation_function" +
                        str(idx)] = activation_layer[activation]()

            if self.use_dropout:
                self.layers["Dropout" + str(idx)] = Dropout(dropout_ratio)
        idx = self.hidden_layer_num + 1
        self.layers["Affine" + str(idx)] = Affine(self.params["W" + str(idx)],
                                                  self.params["b" + str(idx)])
        self.last_layer = SoftmaxWithLoss()

    def __init_weight(self, weight_init_std):
        """
        设定权重的初始值
        :param weight_init_std:
        :return:
        """
        all_size_list = [self.input_size
                         ] + self.hidden_size_list + [self.output_size]
        for idx in range(1, len(all_size_list)):
            scale = weight_init_std
            if str(weight_init_std).lower() in ("relu", "he"):
                scale = np.sqrt(2.0 / all_size_list[idx - 1])
            elif str(weight_init_std).lower() in ("sigmoid", "xavier"):
                scale = np.sqrt(1.0 / all_size_list[idx - 1])
            self.params["W" + str(idx)] = scale * np.random.randn(
                all_size_list[idx - 1], all_size_list[idx])
            self.params["b" + str(idx)] = np.zeros(all_size_list[idx])

    def predict(self, x, train_flg=False):
        for key, layer in self.layers.items():
            if "Dropout" in key or "BatchNorm" in key:
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)
        return x

    def loss(self, x, t, train_flg=False):
        """
        求损失函数
        :param x:输入数据
        :param t: 真是标签
        :param train_flg:是否为模型训练
        :return:
        """

        y = self.predict(x, train_flg)

        weight_decay = 0
        for idx in range(1, self.hidden_layer_num + 2):
            W = self.params["W" + str(idx)]
            weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)

        return self.last_layer.forward(y, t) + weight_decay

    def accuracy(self, X, T):
        Y = self.predict(X, train_flg=False)
        Y = np.argmax(Y, axis=1)
        if T.ndim != 1:
            T = np.argmax(T, axis=1)
        accuracy = np.sum(Y == T) / float(X.shape[0])
        return accuracy

    def numerical_gradient(self, X, T):
        loss_W = lambda W: self.loss(X, T, train_flg=True)
        grads = {}
        for idx in range(1, self.hidden_layer_num + 2):
            grads["W" + str(idx)] = numerical_gradient(
                loss_W, self.params["W" + str(idx)])
            grads["b" + str(idx)] = numerical_gradient(
                loss_W, self.params["b" + str(idx)])

            if self.use_batchnorm and idx != self.hidden_layer_num + 1:
                grads['gamma' + str(idx)] = numerical_gradient(
                    loss_W, self.params['gamma' + str(idx)])
                grads['beta' + str(idx)] = numerical_gradient(
                    loss_W, self.params['beta' + str(idx)])

        return grads

    def gradient(self, x, t):

        # forward
        self.loss(x, t, train_flg=True)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 设定
        grads = {}
        for idx in range(1, self.hidden_layer_num + 2):
            grads["W" + str(idx)] = self.layers["Affine" + str(
                idx)].dW + self.weight_decay_lambda * self.params["W" +
                                                                  str(idx)]
            grads["b" + str(idx)] = self.layers["Affine" + str(idx)].db

            if self.use_batchnorm and idx != self.hidden_layer_num + 1:
                grads['gamma' + str(idx)] = self.layers['BatchNorm' +
                                                        str(idx)].dgamma
                grads['beta' + str(idx)] = self.layers['BatchNorm' +
                                                       str(idx)].dbeta

        return grads
Exemplo n.º 19
0
# coding: utf-8

import numpy as np
import sys
sys.path.append('../../')
from common.layers import SoftmaxWithLoss

softmaxWithLoss = SoftmaxWithLoss()

#---------------------------------------
x = np.array([[0.3, 0.2, 0.5]])
t = np.array([[0, 1, 0]])
out = softmaxWithLoss.forward(x, t)
print(out)
dx = softmaxWithLoss.backward(1)
print(dx)

#---------------------------------------
x = np.array([[0.01, 0.99, 0.0]])
t = np.array([[0, 1, 0]])
out = softmaxWithLoss.forward(x, t)
print(out)
dx = softmaxWithLoss.backward(1)
print(dx)
class DeepConvnet:
    def __init__(self, input_dim=(1, 28, 28),
                 conv_param_1={'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_2={'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_3={'filter_num': 32, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_4={'filter_num': 32, 'filter_size': 3, 'pad': 2, 'stride': 1},
                 conv_param_5={'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_6={'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 hidden_size=50, output_size=10):
        pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size])
        weight_init_scale = np.sqrt(2.0 / pre_node_nums)

        # weights init
        self.params = {}
        pre_channel_num = input_dim[0]
        for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3,
                                          conv_param_4, conv_param_5, conv_param_6]):
            self.params['w'+str(idx+1)] = weight_init_scale[idx] *\
                    np.random.randn(
                        conv_param['filter_num'],
                        pre_channel_num, conv_param['filter_size'],
                        conv_param['filter_size'])
            self.params['b'+str(idx+1)] = np.zeros(conv_param['filter_num'])
            pre_channel_num = conv_param['filter_num']
        self.params['w7'] = weight_init_scale[6] * np.random.randn(64*4*4, hidden_size)
        self.params['b7'] = np.zeros(hidden_size)
        self.params['w8'] = weight_init_scale[7] * np.random.randn(hidden_size, output_size)
        self.params['b8'] = np.zeros(output_size)

        # gen layers
        self.layers = []
        self.layers.append(Convolution(self.params['w1'], self.params['b1'], conv_param_1['stride'],
                                       conv_param_1['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['w2'], self.params['b2'], conv_param_2['stride'],
                                       conv_param_2['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['w3'], self.params['b3'], conv_param_3['stride'],
                                       conv_param_3['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['w4'], self.params['b4'], conv_param_4['stride'],
                                       conv_param_4['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['w5'], self.params['b5'], conv_param_5['stride'],
                                       conv_param_5['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['w6'], self.params['b6'], conv_param_6['stride'],
                                       conv_param_6['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Affine(self.params['w7'], self.params['b7']))
        self.layers.append(Relu())
        self.layers.append(Dropout(0.5))
        self.layers.append(Affine(self.params['w8'], self.params['b8']))
        self.layers.append(Dropout(0.5))
        self.last_layer = SoftmaxWithLoss()

    def predict(self, x, train_flg=False):
        for layer in self.layers:
            if isinstance(layer, Dropout):
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x, train_flg=True)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1:
            t = np.argmax(t, axis=1)
        acc = 0.0
        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)
        return acc / x.shape[0]

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        tmp_layers = self.layers.copy()
        tmp_layers.reverse()
        for layer in tmp_layers:
            dout = layer.backward(dout)

        # settings
        grads = {}
        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
            grads['w'+str(i+1)] = self.layers[layer_idx].dw
            grads['b'+str(i+1)] = self.layers[layer_idx].db

        return grads

    def save_params(self, file_name='params.pkl'):
        params = {}
        for key, val in self.params.items():
            params[key] = val
        with open(file_name, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self, file_name='params.pkl'):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val
        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
            self.layers[layer_idx].w = self.params['w'+str(i+1)]
            self.layers[layer_idx].b = self.params['b'+str(i+1)]
class DeepConvNet:
    """認識率99%以上の高精度なConvNet

    ネットワーク構成は下記の通り
        conv - relu - conv- relu - pool -
        conv - relu - conv- relu - pool -
        conv - relu - conv- relu - pool -
        affine - relu - dropout - affine - dropout - softmax
    """
    def __init__(self, input_dim=(1, 28, 28),
                 conv_param_1 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_2 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_3 = {'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_4 = {'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1},
                 conv_param_5 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_6 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
                 hidden_size=50, output_size=10):
        # 重みの初期化===========
        # 各層のニューロンひとつあたりが、前層のニューロンといくつのつながりがあるか(TODO:自動で計算する)
        pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size])
        weight_init_scales = np.sqrt(2.0 / pre_node_nums)  # ReLUを使う場合に推奨される初期値
        
        self.params = {} 
        pre_channel_num = input_dim[0]
        for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]):
            self.params['W' + str(idx+1)] = weight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size'])
            self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])
            pre_channel_num = conv_param['filter_num']
        self.params['W7'] = weight_init_scales[6] * np.random.randn(64*4*4, hidden_size)
        self.params['b7'] = np.zeros(hidden_size)
        self.params['W8'] = weight_init_scales[7] * np.random.randn(hidden_size, output_size)
        self.params['b8'] = np.zeros(output_size)

        # レイヤの生成===========
        self.layers = []
        self.layers.append(Convolution(self.params['W1'], self.params['b1'], 
                           conv_param_1['stride'], conv_param_1['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['W2'], self.params['b2'], 
                           conv_param_2['stride'], conv_param_2['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['W3'], self.params['b3'], 
                           conv_param_3['stride'], conv_param_3['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['W4'], self.params['b4'],
                           conv_param_4['stride'], conv_param_4['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['W5'], self.params['b5'],
                           conv_param_5['stride'], conv_param_5['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['W6'], self.params['b6'],
                           conv_param_6['stride'], conv_param_6['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Affine(self.params['W7'], self.params['b7']))
        self.layers.append(Relu())
        self.layers.append(Dropout(0.5))
        self.layers.append(Affine(self.params['W8'], self.params['b8']))
        self.layers.append(Dropout(0.5))
        
        self.last_layer = SoftmaxWithLoss()

    def predict(self, x, train_flg=False):
        for layer in self.layers:
            if isinstance(layer, Dropout):
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x, train_flg=True)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1 : t = np.argmax(t, axis=1)

        acc = 0.0

        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx, train_flg=False)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

        return acc / x.shape[0]

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        tmp_layers = self.layers.copy()
        tmp_layers.reverse()
        for layer in tmp_layers:
            dout = layer.backward(dout)

        # 設定
        grads = {}
        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
            grads['W' + str(i+1)] = self.layers[layer_idx].dW
            grads['b' + str(i+1)] = self.layers[layer_idx].db

        return grads

    def save_params(self, file_name="params.pkl"):
        params = {}
        for key, val in self.params.items():
            params[key] = val
        with open(file_name, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val

        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
            self.layers[layer_idx].W = self.params['W' + str(i+1)]
            self.layers[layer_idx].b = self.params['b' + str(i+1)]
Exemplo n.º 22
0
class NeuralNetwork():
    def __init__(self,
                 n_features,
                 n_output,
                 n_hidden=30,
                 l2=0.0,
                 l1=0.0,
                 epochs=50,
                 eta=0.001,
                 decrease_const=0.0,
                 shuffle=True,
                 n_minibatches=1,
                 random_state=None):
        np.random.seed(random_state)
        self.n_features = n_features
        self.n_hidden = n_hidden
        self.n_output = n_output
        self.l2 = l2
        self.l1 = l1
        self.epochs = epochs
        self.eta = eta
        self.decrease_const = decrease_const
        self.shuffle = shuffle
        self.n_minibatches = n_minibatches

        self.params = {}
        self._init_weights()

        self.layers = {}
        self.layers['Affine_1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Sigmoid'] = Sigmoid()
        self.layers['Affine_2'] = Affine(self.params['W2'], self.params['b2'])
        self.last_layer = SoftmaxWithLoss()

        self._loss = []
        self._iter_t = 0

    def _init_weights(self):
        ls_nodes = [self.n_features, self.n_hidden, self.n_output]
        scale_1 = np.sqrt(1.0 / ls_nodes[0])
        scale_2 = np.sqrt(1.0 / ls_nodes[1])

        self.params['W1'] = scale_1 * np.random.randn(ls_nodes[0], ls_nodes[1])
        self.params['b1'] = np.zeros(ls_nodes[1])
        self.params['W2'] = scale_2 * np.random.randn(ls_nodes[1], ls_nodes[2])
        self.params['b2'] = np.zeros(ls_nodes[2])

    def predict(self, X):
        for layer in self.layers.values():
            X = layer(X)
        y_hat = X
        return y_hat

    def _calc_loss(self, X, t):
        y_hat = self.predict(X)

        W1, W2 = self.params['W1'], self.params['W2']
        l2_term, l1_term = 0.0, 0.0
        l2_term += 0.5 * self.l2 * np.sum(W1**2)
        l2_term += 0.5 * self.l2 * np.sum(W2**2)
        l1_term += 0.5 * self.l1 * np.abs(W1).sum()
        l1_term += 0.5 * self.l1 * np.abs(W2).sum()

        loss = self.last_layer(y_hat, t) + l2_term + l1_term
        return loss

    def accuracy(self, X, t):
        y_hat = self.predict(X)
        y = np.argmax(y_hat, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(X.shape[0])
        return accuracy

    def _encode_labels(self, y, n_labels):
        onehot = np.zeros((y.shape[0], n_labels))
        for idx, val in enumerate(y):
            onehot[idx, val] = 1.0
        return onehot

    def fit(self, X, y, print_progress=False):
        X_data, y_data = X.copy(), y.copy()
        y_enc = self._encode_labels(y, self.n_output)

        self._loss = []

        for i in range(self.epochs):
            self.eta /= (1 + self.decrease_const * i)

            if print_progress:
                sys.stderr.write('\rEpoch: {}/{}'.format(i + 1, self.epochs))
                sys.stderr.flush()

            if self.shuffle:
                idx = np.random.permutation(y_data.shape[0])
                X_data, y_enc = X_data[idx], y_enc[idx]

            batches = np.array_split(range(y_data.shape[0]),
                                     self.n_minibatches)
            self._iter_t = 0
            for batch in batches:
                # forward
                X_batch, y_batch = X_data[batch], y_enc[batch]
                loss = self._calc_loss(X_batch, y_batch)
                self._loss.append(loss)

                # backward
                delta = 1
                delta = self.last_layer.backward(delta)

                layers = list(self.layers.values())
                layers.reverse()
                for layer in layers:
                    delta = layer.backward(delta)

                # gradients
                grads = {}
                W1 = self.layers['Affine_1'].W
                W2 = self.layers['Affine_2'].W

                grads['W1'] = self.layers[
                    'Affine_1'].dW + self.l2 * W1 + self.l1 * np.sign(W1)
                grads['b1'] = self.layers['Affine_1'].db

                grads['W2'] = self.layers[
                    'Affine_2'].dW + self.l2 * W2 + self.l1 * np.sign(W2)
                grads['b2'] = self.layers['Affine_2'].db

                self._update_grads(self.params, grads)

        return self

    """
    # SGD
    def _update_grads(self, params, grads):
        for key in params.keys():
            params[key] -= self.eta * grads[key] 
            
    """

    # Adam
    def _update_grads(self, params, grads):
        beta1, beta2 = 0.9, 0.999
        eps = 1e-8
        m, v = {}, {}
        for key, val in params.items():
            m[key] = np.zeros_like(val)
            v[key] = np.zeros_like(val)

        self._iter_t += 1
        for key in params.keys():
            t = self._iter_t
            # (1 - beta)で因数分解されたAdamの更新式
            m[key] += (1 - beta1) * (grads[key] + m[key])
            m[key] /= 1 - beta1**t
            v[key] += (1 - beta2) * (grads[key]**2 + v[key])
            v[key] /= 1 - beta2**t

            params[key] -= self.eta * m[key] / (np.sqrt(v[key]) + eps)

    @property
    def loss_(self):
        return self._loss
class SimpleConvNet:
    def __init__(self, input_dim=(1, 28, 28),
                 conv_param={'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
                 hidden_size=100, output_size=10, weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2))

        # 初始化权重
        self.params = {'W1': weight_init_std * \
                             np.random.randn(filter_num, input_dim[0], filter_size, filter_size),
                       'b1': np.zeros(filter_num), 'W2': weight_init_std * \
                                                         np.random.randn(pool_output_size, hidden_size),
                       'b2': np.zeros(hidden_size), 'W3': weight_init_std * \
                                                          np.random.randn(hidden_size, output_size),
                       'b3': np.zeros(output_size)}

        # 生成层
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
                                           conv_param['stride'], conv_param['pad'])
        self.layers['Relu1'] = ReLU()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = ReLU()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        """求损失函数
        参数x是输入数据、t是教师标签
        """
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1: t = np.argmax(t, axis=1)

        acc = 0.0

        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i * batch_size:(i + 1) * batch_size]
            tt = t[i * batch_size:(i + 1) * batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

        return acc / x.shape[0]

    def numerical_gradient(self, x, t):
        loss_w = lambda w: self.loss(x, t)

        grads = {}
        for idx in (1, 2, 3):
            grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
            grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])

        return grads

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 设定
        grads = {'W1': self.layers['Conv1'].dW, 'b1': self.layers['Conv1'].db, 'W2': self.layers['Affine1'].dW,
                 'b2': self.layers['Affine1'].db, 'W3': self.layers['Affine2'].dW, 'b3': self.layers['Affine2'].db}

        return grads

    def save_params(self, file_name="params.pkl"):
        params = {}
        for key, val in self.params.items():
            params[key] = val
        with open(file_name, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val

        for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
            self.layers[key].W = self.params['W' + str(i + 1)]
            self.layers[key].b = self.params['b' + str(i + 1)]
class DeepConvNet:
    """
    识别率为99%以上的高精度的ConvNet
        网络结构如下所示
            conv - relu - conv- relu - pool -
            conv - relu - conv- relu - pool -
            conv - relu - conv- relu - pool -
            affine - relu - dropout - affine - dropout - softmax
    """
    def __init__(self,
                 input_dim=(1, 28, 28),
                 conv_param_1=None,
                 conv_param_2=None,
                 conv_param_3=None,
                 conv_param_4=None,
                 conv_param_5=None,
                 conv_param_6=None,
                 hidden_size=50,
                 output_size=10):
        # 第一个卷积层输入1x28x28,输出16x28x28
        if conv_param_1 is None:
            conv_param_1 = {
                'filter_num': 16,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            }
        # 第二个卷积层输入16x28x28,输出16x28x28
        if conv_param_2 is None:
            conv_param_2 = {
                'filter_num': 16,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            }
        # 第二个卷积层之后接最大池化层,池化层大小为2x2,步长为2,即高、宽减半
        # 第三个卷积层输入16x14x14,输出32x14x14
        if conv_param_3 is None:
            conv_param_3 = {
                'filter_num': 32,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            }
        # 第四个卷积层输入32x14x14,但由于pad2个,因此输出32x16x16
        if conv_param_4 is None:
            conv_param_4 = {
                'filter_num': 32,
                'filter_size': 3,
                'pad': 2,
                'stride': 1
            }
        # 第四个卷积层之后接最大池化层,池化层大小为2x2,步长为2,即高、宽减半
        # 第五个卷积层输入32x8x8,输出64x8x8
        if conv_param_5 is None:
            conv_param_5 = {
                'filter_num': 64,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            }
        # 第五个卷积层输入64x8x8,输出64x8x8
        if conv_param_6 is None:
            conv_param_6 = {
                'filter_num': 64,
                'filter_size': 3,
                'pad': 1,
                'stride': 1
            }
        """
        卷积层的每个节点只与前一层的filter_size个节点连接,
        即本层卷积层的卷积核 高x宽有多少,就和前一层的多少个节点连接。
        如果有多个通道,那还要乘上通道数(深度)
        这里的所有卷积层都用3x3的大小
        
        各层输出如下:
        卷积层1:              16 28 28
        卷积层2 | 池化层1:    16 28 28 | 16 14 14
        卷积层3:              32 14 14
        卷积层4 | 池化层2:    32 16 16 | 32 8 8
        卷积层5:              64 8 8
        卷积层6:              64 8 8 | 64 4 4
        """
        pre_node_nums = np.array([
            1 * 3 * 3,  # 卷积层1:前一层(输入层)通道数(深度)为1
            16 * 3 * 3,  # 卷积层2:前一层(卷积层1)通道数(深度)为16
            16 * 3 * 3,  # 卷积层3:前一层(卷积层2)通道数(深度)为16
            32 * 3 * 3,  # 卷积层4:前一层(卷积层3)通道数(深度)为32
            32 * 3 * 3,  # 卷积层5:前一层(卷积层4)通道数(深度)为32
            64 * 3 * 3,  # 卷积层6:前一层(卷积层5)通道数(深度)为64

            # 隐藏层:前一层(池化层),池化层接全连接层需要拉直成一维数组,
            # 因此隐藏层与前一层(池化层)的连接数为池化层的输出节点总数
            64 * 4 * 4,
            # 输出层:前一层(隐藏层),全连接与前一层全部节点相连,即隐藏层大小
            hidden_size
        ])

        # 权重初始化时的标准差。由于使用ReLU激活函数,因此使用He初始化方式
        weight_init_scales = np.sqrt(2.0 / pre_node_nums)
        """初始化权重参数和偏置"""
        self.params = {}
        pre_channel_num = input_dim[0]  # 记录上一层的通道数(即滤波器的通道数)
        for idx, conv_param in enumerate([
                conv_param_1, conv_param_2, conv_param_3, conv_param_4,
                conv_param_5, conv_param_6
        ]):
            # 卷积层滤波器的形状:滤波器个数、通道数、高度、宽度
            self.params['W'+str(idx+1)] = weight_init_scales[idx] *\
                                        np.random.randn(
                                            conv_param['filter_num'],
                                            pre_channel_num,
                                            conv_param['filter_size'],
                                            conv_param['filter_size'])
            self.params['b' + str(idx + 1)] = np.zeros(
                conv_param['filter_num'])

            pre_channel_num = conv_param['filter_num']  # 更新上一层的通道数

        self.params['W7'] = weight_init_scales[6] * np.random.randn(
            64 * 4 * 4, hidden_size)
        self.params['b7'] = np.zeros(hidden_size)
        self.params['W8'] = weight_init_scales[7] * np.random.randn(
            hidden_size, output_size)
        self.params['b8'] = np.zeros(output_size)
        """
        构造神经网络:
        书上没有用到之前用的有序字典,其实我觉得很好用,所以就实现了有序字典版本
        Conv1->ReLU1->Conv2->ReLU2->Pool1->
        Conv3->ReLU3->Conv4->ReLU4->Pool2->
        Conv5->ReLU5->Conv6->ReLU6->Pool3->
        Affine1(Hidden Layer1)->ReLU7->Dropout1->
        Affine2(Output Layer1)->Dropout2------->SoftmaxWithLoss
        """
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'],
                                           self.params['b1'],
                                           stride=conv_param_1['stride'],
                                           pad=conv_param_1['pad'])
        self.layers['ReLU1'] = ReLU()
        self.layers['Conv2'] = Convolution(self.params['W2'],
                                           self.params['b2'],
                                           stride=conv_param_2['stride'],
                                           pad=conv_param_2['pad'])
        self.layers['ReLU2'] = ReLU()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2, pad=0)
        self.layers['Conv3'] = Convolution(self.params['W3'],
                                           self.params['b3'],
                                           stride=conv_param_3['stride'],
                                           pad=conv_param_3['pad'])
        self.layers['ReLU3'] = ReLU()
        self.layers['Conv4'] = Convolution(self.params['W4'],
                                           self.params['b4'],
                                           stride=conv_param_4['stride'],
                                           pad=conv_param_4['pad'])
        self.layers['ReLU4'] = ReLU()
        self.layers['Pool2'] = Pooling(pool_h=2, pool_w=2, stride=2, pad=0)
        self.layers['Conv5'] = Convolution(self.params['W5'],
                                           self.params['b5'],
                                           stride=conv_param_5['stride'],
                                           pad=conv_param_5['pad'])
        self.layers['ReLU5'] = ReLU()
        self.layers['Conv6'] = Convolution(self.params['W6'],
                                           self.params['b6'],
                                           stride=conv_param_6['stride'],
                                           pad=conv_param_6['pad'])
        self.layers['ReLU6'] = ReLU()
        self.layers['Pool3'] = Pooling(pool_h=2, pool_w=2, stride=2, pad=0)
        self.layers['Affine1'] = Affine(self.params['W7'], self.params['b7'])
        self.layers['ReLU7'] = ReLU()
        self.layers['Dropout1'] = Dropout(dropout_ratio=0.5)
        self.layers['Affine2'] = Affine(self.params['W8'], self.params['b8'])
        self.layers['Dropout2'] = Dropout(dropout_ratio=0.5)

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x, train_flag=False):
        # 逐层前向传播,预测输入x的输出
        # 如果是Dropout层,需要将train_flag参数传入
        for layer in self.layers.values():
            if isinstance(layer, Dropout):
                x = layer.forward(x, train_flag)
            else:
                x = layer.forward(x)

        return x

    def loss(self, x, true_label):
        # 计算损失值。这里只计算了交叉熵误差,也可以加上L2正则化项
        y = self.predict(x, train_flag=True)
        total_loss = self.last_layer.forward(y, true_label)

        return total_loss

    def accuracy(self, x, true_label, batch_size=100):
        """
        计算输入x的预测准确率。使用batch处理加速运算
        :param x: 输入数据
        :param true_label: 真实标签
        :param batch_size: 批处理数据量
        :return: 准确率
        """
        # 如果真实标签是one-hot编码,先提取成一维数组:一行代表一个真实值
        if true_label.ndim != 1:
            true_label = np.argmax(true_label, axis=1)

        correct_cnt = 0.0
        # 书上原本代码没有处理剩余的这部分数据。
        # 在这里加上iters这个变量,用于控制循环次数
        if x.shape[0] % batch_size:
            iters = int(x.shape[0] / batch_size) + 1
        else:
            iters = int(x.shape[0] / batch_size)

        for i in range(iters):
            # 获取一个batch的数据和对应的真实标签
            temp_x = x[i * batch_size:(i + 1) * batch_size]
            temp_true_label = true_label[i * batch_size:(i + 1) * batch_size]
            # 预测这个batch的数据的输出
            y = self.predict(temp_x)
            y = np.argmax(y, axis=1)
            # 统计每个batch的预测正确数
            correct_cnt += np.sum(y == temp_true_label)

        acc = correct_cnt / x.shape[0]  # 计算准确率
        return acc

    def gradient(self, x, true_label):
        # 先前向传播计算中间值
        self.loss(x, true_label)

        # 逐层反向传播
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 反向传播结束后从各层提取梯度
        grads = {}
        for i in range(1, 7):
            grads['W' + str(i)] = self.layers['Conv' + str(i)].dW
            grads['b' + str(i)] = self.layers['Conv' + str(i)].db

        grads['W7'] = self.layers['Affine1'].dW
        grads['b7'] = self.layers['Affine1'].db
        grads['W8'] = self.layers['Affine2'].dW
        grads['b8'] = self.layers['Affine2'].db

        return grads

    def save_params(self):
        # 持久化训练好的参数
        file_path = './data/params.pkl'
        params = {}
        for key, val in self.params.items():
            params[key] = val

        with open(file_path, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self):
        # 加载参数
        file_path = './data/params.pkl'
        with open(file_path, 'rb') as f:
            params = pickle.load(f)

        for key, val in params.items():
            self.params[key] = val

        for i in range(1, 7):
            self.layers['Conv' + str(i)].W = self.params['W' + str(i)]
            self.layers['Conv' + str(i)].b = self.params['b' + str(i)]

        self.layers['Affine1'].W = self.params['W7']
        self.layers['Affine1'].b = self.params['b7']
        self.layers['Affine2'].W = self.params['W8']
        self.layers['Affine2'].b = self.params['b8']
Exemplo n.º 25
0
class TwoLayerNet:
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 weight_init_std=0.01):
        # 重みの初期化
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(
            input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(
            hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        # レイヤの生成
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

        self.lastLayer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    # x:入力データ, t:教師データ
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1: t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    # x:入力データ, t:教師データ
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 設定
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers[
            'Affine1'].db
        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers[
            'Affine2'].db

        return grads
Exemplo n.º 26
0
class ConvNet:
    def __init__(self,
                 input_dim=(1, 28, 28),
                 use_conv2=True,
                 use_affine2=True,
                 conv_param={
                     'filter_num': 128,
                     'filter_size': 3,
                     'pad': 1,
                     'stride': 1
                 },
                 pool_param={
                     'pool_size': 2,
                     'pad': 1,
                     'stride': 2
                 },
                 conv_param2={
                     'filter_num2': 128,
                     'filter_size2': 3,
                     'pad2': 1,
                     'stride2': 1
                 },
                 pool_param2={
                     'pool_size2': 2,
                     'pad2': 1,
                     'stride2': 2
                 },
                 hidden_size=128,
                 hidden_size2=128,
                 output_size=15,
                 weight_init_std=0.01,
                 use_batchnorm_C1=False,
                 use_batchnorm_C2=False,
                 use_batchnorm_A1=False,
                 use_batchnorm_A2=False,
                 use_dropout_A1=False,
                 dropout_ratio_A1=0.5,
                 use_dropout_A2=False,
                 dropout_ratio_A2=0.5,
                 use_succession=False,
                 data_num=1,
                 prediction_mode=False):

        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']

        pool_size = pool_param['pool_size']
        pool_pad = pool_param['pad']
        pool_stride = pool_param['stride']

        filter_num2 = conv_param2['filter_num2']
        filter_size2 = conv_param2['filter_size2']
        filter_pad2 = conv_param2['pad2']
        filter_stride2 = conv_param2['stride2']

        pool_size2 = pool_param2['pool_size2']
        pool_pad2 = pool_param2['pad2']
        pool_stride2 = pool_param2['stride2']

        input_size = input_dim[1]
        conv_output_size = (input_size + 2 * filter_pad - filter_size
                            ) // filter_stride + 1  # 畳み込み後のサイズ(H,W共通)
        pool_output_size = (conv_output_size + 2 * pool_pad -
                            pool_size) // pool_stride + 1  # プーリング後のサイズ(H,W共通)
        pool_output_pixel = filter_num * pool_output_size * pool_output_size  # プーリング後のピクセル総数

        input_size2 = pool_output_size
        conv_output_size2 = (input_size2 + 2 * filter_pad2 - filter_size2
                             ) // filter_stride2 + 1  # 畳み込み後のサイズ(H,W共通)
        pool_output_size2 = (conv_output_size2 + 2 * pool_pad2 - pool_size2
                             ) // pool_stride2 + 1  # プーリング後のサイズ(H,W共通)
        pool_output_pixel2 = filter_num2 * pool_output_size2 * pool_output_size2  # プーリング後のピクセル総数

        self.use_conv2 = use_conv2
        self.use_affine2 = use_affine2
        self.use_batchnorm_C1 = use_batchnorm_C1
        self.use_batchnorm_C2 = use_batchnorm_C2
        self.use_batchnorm_A1 = use_batchnorm_A1
        self.use_batchnorm_A2 = use_batchnorm_A2
        self.use_dropout_A1 = use_dropout_A1
        self.use_dropout_A2 = use_dropout_A2
        self.dropout_ratio_A1 = dropout_ratio_A1
        self.dropout_ratio_A2 = dropout_ratio_A2
        self.use_succession = use_succession
        self.data_num = data_num
        self.prediction_mode = prediction_mode

        # if W1 == []:
        self.params = {}
        self.paramsB = {}
        std = weight_init_std

        if self.use_succession:
            #----------重みをpickleから代入--------------
            with open("params_" + str(self.data_num) + ".pickle", "rb") as f:
                params_s = pickle.load(f)
            with open("params_BN" + str(self.data_num) + ".pickle", "rb") as f:
                params_BN = pickle.load(f)
            # self.params = {}
            # self.paramsB = {}

            self.params['W1'] = params_s['W1']  # W1は畳み込みフィルターの重みになる
            self.params['b1'] = params_s['b1']
            if self.use_batchnorm_C1:
                self.paramsB["BC1_moving_mean"] = params_BN["BC1_moving_mean"]
                self.paramsB["BC1_moving_var"] = params_BN["BC1_moving_var"]

            if self.use_conv2:
                self.params['W1_2'] = params_s['W1_2']
                self.params['b1_2'] = params_s['b1_2']
                if self.use_batchnorm_C2:
                    self.paramsB["BC2_moving_mean"] = params_BN[
                        "BC2_moving_mean"]
                    self.paramsB["BC2_moving_var"] = params_BN[
                        "BC2_moving_var"]

            self.params['W2'] = params_s['W2']
            self.params['b2'] = params_s['b2']

            if self.use_batchnorm_A1:
                self.paramsB["BA1_moving_mean"] = params_BN["BA1_moving_mean"]
                self.paramsB["BA1_moving_var"] = params_BN["BA1_moving_var"]

            if self.use_affine2:
                self.params['W2_2'] = params_s['W2_2']
                self.params['b2_2'] = params_s['b2_2']
                if self.use_batchnorm_A2:
                    self.paramsB["BA2_moving_mean"] = params_BN[
                        "BA2_moving_mean"]
                    self.paramsB["BA2_moving_var"] = params_BN[
                        "BA2_moving_var"]

            self.params['W3'] = params_s['W3']
            self.params['b3'] = params_s['b3']

            #----------重みをpickleから代入--------------
        else:
            # 重みの初期化
            #----第1層Conv----
            self.params['W1'] = std * np.random.randn(
                filter_num, input_dim[0], filter_size,
                filter_size)  # W1は畳み込みフィルターの重みになる
            self.params['b1'] = np.zeros(filter_num)  #b1は畳み込みフィルターのバイアスになる

            #----第2層Conv----
            if self.use_conv2:
                self.params['W1_2'] = std * np.random.randn(
                    filter_num2, filter_num, filter_size2,
                    filter_size2)  #-----追加------
                self.params['b1_2'] = np.zeros(filter_num2)  #-----追加------

                #----第3層Affine----
                self.params['W2'] = std * np.random.randn(
                    pool_output_pixel2, hidden_size)
            else:
                self.params['W2'] = std * np.random.randn(
                    pool_output_pixel, hidden_size)
            self.params['b2'] = np.zeros(hidden_size)

            #----第4層Affine----
            if self.use_affine2:
                self.params['W2_2'] = std * np.random.randn(
                    hidden_size, hidden_size2)  #-----追加------
                self.params['b2_2'] = np.zeros(hidden_size2)  #-----追加------

                #----第5層出力----
                self.params['W3'] = std * np.random.randn(
                    hidden_size2, output_size)  #--変更--
            else:
                self.params['W3'] = std * np.random.randn(
                    hidden_size, output_size)  #--変更--
            self.params['b3'] = np.zeros(output_size)

        # レイヤの生成
        self.layers = OrderedDict()
        #----第1層Conv----
        self.layers['Conv1'] = Convolution(
            self.params['W1'], self.params['b1'], conv_param['stride'],
            conv_param['pad'])  # W1が畳み込みフィルターの重み, b1が畳み込みフィルターのバイアスになる
        if self.use_batchnorm_C1:
            print(conv_output_size)
            print(conv_output_size ^ 2)
            batch_num = conv_output_size * conv_output_size
            if self.prediction_mode:
                self.layers['BatchNormalization_C1'] = BatchNormalization(
                    np.ones(batch_num, filter_num),
                    np.zeros(filter_num),
                    moving_mean=self.paramsB["BC1_moving_mean"],
                    moving_var=self.paramsB["BC1_moving_var"])
            else:
                self.layers['BatchNormalization_C1'] = BatchNormalization(
                    np.ones(batch_num),
                    np.zeros(batch_num),
                    DataNum=self.data_num,
                    LayerNum="C1")
                self.paramsB["BC1_moving_mean"] = self.layers[
                    'BatchNormalization_C1'].moving_mean
                self.paramsB["BC1_moving_var"] = self.layers[
                    'BatchNormalization_C1'].moving_var
        self.layers['ReLU1'] = ReLU()
        self.layers['Pool1'] = MaxPooling(pool_h=pool_size,
                                          pool_w=pool_size,
                                          stride=pool_stride,
                                          pad=pool_pad)

        #----第2層Conv----
        if self.use_conv2:
            self.layers['Conv1_2'] = Convolution(
                self.params['W1_2'], self.params['b1_2'],
                conv_param2['stride2'], conv_param2['pad2'])  #-----追加------
            if self.use_batchnorm_C2:
                batch_num2 = conv_output_size2 * conv_output_size2 * filter_num2
                if self.prediction_mode:
                    self.layers['BatchNormalization_C2'] = BatchNormalization(
                        np.ones(batch_num),
                        np.zeros(batch_num),
                        moving_mean=self.paramsB["BC2_moving_mean"],
                        moving_var=self.paramsB["BC12moving_var"])
                else:
                    self.layers['BatchNormalization_C2'] = BatchNormalization(
                        np.ones(batch_num),
                        np.zeros(batch_num),
                        DataNum=self.data_num,
                        LayerNum="C2")
                    self.paramsB["BC2_moving_mean"] = self.layers[
                        'BatchNormalization_C2'].moving_mean
                    self.paramsB["BC2_moving_var"] = self.layers[
                        'BatchNormalization_C2'].moving_var
            self.layers['ReLU1_2'] = ReLU()  #-----追加------
            self.layers['Pool1_2'] = MaxPooling(pool_h=pool_size2,
                                                pool_w=pool_size2,
                                                stride=pool_stride2,
                                                pad=pool_pad2)  #-----追加------

        #----第3層Affine----
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        if self.use_batchnorm_A1:
            if self.prediction_mode:
                self.layers['BatchNormalization_A1'] = BatchNormalization(
                    np.ones(hidden_size),
                    np.zeros(hidden_size),
                    moving_mean=self.paramsB["BA1_moving_mean"],
                    moving_var=self.paramsB["BA1_moving_var"])
            else:
                self.layers['BatchNormalization_A1'] = BatchNormalization(
                    np.ones(hidden_size),
                    np.zeros(hidden_size),
                    DataNum=self.data_num,
                    LayerNum="A1")
                self.paramsB["BA1_moving_mean"] = self.layers[
                    'BatchNormalization_A1'].moving_mean
                self.paramsB["BA1_moving_var"] = self.layers[
                    'BatchNormalization_A1'].moving_var

        if self.use_dropout_A1:
            self.layers['DropoutA1'] = Dropout(self.dropout_ratio_A1)
        self.layers['ReLU2'] = ReLU()

        # ----第4層Affine----
        if self.use_affine2:
            self.layers['Affine2'] = Affine(
                self.params['W2_2'], self.params['b2_2'])  #-----追加------
            if self.use_batchnorm_A2:
                if self.prediction_mode:
                    self.layers['BatchNormalization_A2'] = BatchNormalization(
                        np.ones(hidden_size2),
                        np.zeros(hidden_size2),
                        moving_mean=self.paramsB["BA2_moving_mean"],
                        moving_var=self.paramsB["BA2_moving_var"])
                else:
                    self.layers['BatchNormalization_A2'] = BatchNormalization(
                        np.ones(hidden_size2),
                        np.zeros(hidden_size2),
                        DataNum=self.data_num,
                        LayerNum="A2")
                self.paramsB["BA2_moving_mean"] = self.layers[
                    'BatchNormalization_A2'].moving_mean
                self.paramsB["BA2_moving_var"] = self.layers[
                    'BatchNormalization_A2'].moving_var

            if self.use_dropout_A2:
                self.layers['DropoutA2'] = Dropout(self.dropout_ratio_A2)
            self.layers['ReLU3'] = ReLU()  #-----追加------

        #----第5層出力----
        self.layers['Affine3'] = Affine(self.params['W3'], self.params['b3'])
        self.last_layer = SoftmaxWithLoss()

        # print('input size',input_size)
        # print('conv_output_size',conv_output_size)
        # print('pool_output_size',pool_output_size)
        # print('pool_output_pixel',pool_output_pixel)

        # print('input size2',input_size2)
        # print('conv_output_size2',conv_output_size2)
        # print('pool_output_size2',pool_output_size2)
        # print('pool_output_pixel2',pool_output_pixel2)

    def predict(self, x, train_flg=False):
        for key, layer in self.layers.items():
            if "Dropout" in key or "BatchNorm" in key:
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)
        return x

    def loss(self, x, t, train_flg=False):
        y = self.predict(x, train_flg)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1:
            t = np.argmax(t, axis=1)
        acc = 0.0

        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i * batch_size:(i + 1) * batch_size]
            tt = t[i * batch_size:(i + 1) * batch_size]
            y = self.predict(tx, train_flg=False)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

        return acc / x.shape[0]

    def gradient(self, x, t):

        # forward
        self.loss(x, t, train_flg=True)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 設定
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers[
            'Conv1'].db
        if self.use_conv2:
            grads['W1_2'], grads['b1_2'] = self.layers[
                'Conv1_2'].dW, self.layers['Conv1_2'].db  #-----追加------
        grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers[
            'Affine1'].db
        if self.use_affine2:
            grads['W2_2'], grads['b2_2'] = self.layers[
                'Affine2'].dW, self.layers['Affine2'].db  #-----追加------
        grads['W3'], grads['b3'] = self.layers['Affine3'].dW, self.layers[
            'Affine3'].db

        if self.prediction_mode == False:
            if self.use_batchnorm_A1:
                self.paramsB["BA1_moving_mean"] = self.layers[
                    'BatchNormalization_A1'].moving_mean
                self.paramsB["BA1_moving_var"] = self.layers[
                    'BatchNormalization_A1'].moving_var
            if self.use_batchnorm_A2:
                self.paramsB["BA2_moving_mean"] = self.layers[
                    'BatchNormalization_A2'].moving_mean
                self.paramsB["BA2_moving_var"] = self.layers[
                    'BatchNormalization_A2'].moving_var
            if self.use_batchnorm_C1:
                self.paramsB["BC1_moving_mean"] = self.layers[
                    'BatchNormalization_C1'].moving_mean
                self.paramsB["BC1_moving_var"] = self.layers[
                    'BatchNormalization_C1'].moving_var
            if self.use_batchnorm_C2:
                self.paramsB["BC2_moving_mean"] = self.layers[
                    'BatchNormalization_C2'].moving_mean
                self.paramsB["BC2_moving_var"] = self.layers[
                    'BatchNormalization_C2'].moving_var
        return grads
class SimpleConvNet:
    def __init__(self, input_dim=(1, 28, 28), conv_param=None, hidden_size=100,
                 output_size=10, weight_init_std=0.01, regularizer_lambda=0.1):
        # 卷积层的默认参数:默认情况下滤波器个数为30个,大小为5x5,不填充,步长1
        if conv_param is None:
            conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0,
                          'stride': 1}
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]  # 输入层的矩阵大小:单通道下二维矩阵的宽/高
        conv_output_size = int((input_size + 2 * filter_pad - filter_size) /
                               filter_stride + 1)  # 卷积层输出的单个特征图的大小
        # 最大池化层的输出大小:池化后保持特征图个数不变,由于使用的是2x2的最大
        # 池化层,因此宽/高都变为原来的一半。
        # 总的输出元素个数为:特征图个数 * (卷积层输出 / 2) * (卷积层输出 / 2)

        # 因为这里的简单CNN中池化层后面接全连接层,
        # 需要将池化层的节点拉平成一个一维数组
        pool_output_size = int(filter_num * (conv_output_size / 2) ** 2)

        self.regularizer_lambda = regularizer_lambda  # 正则化强度

        # 初始化神经网络各层的参数:卷积层、(池化层)、全连接层、全连接层
        # 其中池化层没有需要训练的参数,因此不需要初始化。
        self.params = {}
        # 第一层(卷积层):滤波器的参数(权重参数) + 偏置参数
        # 滤波器的参数有4个:滤波器个数、通道数、高、宽
        self.params['W1'] = weight_init_std * np.random.randn(filter_num,
                                                              input_dim[0],
                                                              filter_size,
                                                              filter_size)
        # 卷积层的偏置参数:一个滤波器需要一个偏置,因此一共filter_num个偏置
        self.params['b1'] = np.zeros(filter_num)
        # 全连接层(在这里是一个隐藏层)权重参数:
        # 输入节点数为池化层的所有节点个数,输出为隐藏层大小
        self.params['W2'] = weight_init_std * np.random.randn(pool_output_size,
                                                              hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        # 全连接层(在这里是输出层)权重参数:
        # 输入节点数为隐藏层的所有节点个数,输出为输出层大小
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size,
                                                              output_size)
        self.params['b3'] = np.zeros(output_size)

        # 构造神经网络:
        # 卷积层、激活层(ReLU层)、最大池化层、
        # 仿射层(隐藏层)、激活层(ReLU层)、仿射层(输出层)
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
                                           conv_param['stride'],
                                           conv_param['pad'])
        self.layers['ReLU1'] = ReLU()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['ReLU2'] = ReLU()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        # 最后加入一层SoftmaxWithLoss层用于计算交叉熵误差,帮助训练神经网络
        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        # 逐层前向传播,预测输入x的输出
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, true_label):
        """
        计算损失。书上原本只计算了交叉熵误差,我在这里加上L2正则化
        :param x:
        :param true_label:
        :return:
        """
        # 计算交叉熵误差
        y = self.predict(x)
        total_loss = self.last_layer.forward(y, true_label)

        # 计算L2正则化项。不知为何加入L2正则化之后损失收敛不了,一直递增
        # 但是训练是正常进行的,准确率也很稳定
        regularizer = 0
        for idx in (1, 2, 3):
            W = self.params['W'+str(idx)]
            regularizer += 0.5 * self.regularizer_lambda * np.sum(W ** 2)

        total_loss += regularizer

        return total_loss

    def accuracy(self, x, true_label, batch_size=100):
        """
        计算输入x的预测准确率。使用batch处理加速运算
        :param x: 输入数据
        :param true_label: 真实标签
        :param batch_size: 批处理数据量
        :return: 准确率
        """
        # 如果真实标签是one-hot编码,先提取成一维数组:一行代表一个真实值
        if true_label.ndim != 1:
            true_label = np.argmax(true_label, axis=1)

        correct_cnt = 0.0
        # 书上原本代码没有处理剩余的这部分数据。
        # 在这里加上iters这个变量,用于控制循环次数
        if x.shape[0] % batch_size:
            iters = int(x.shape[0] / batch_size) + 1
        else:
            iters = int(x.shape[0] / batch_size)

        for i in range(iters):
            # 获取一个batch的数据和对应的真实标签
            temp_x = x[i * batch_size: (i+1) * batch_size]
            temp_true_label = true_label[i * batch_size: (i+1) * batch_size]
            # 预测这个batch的数据的输出
            y = self.predict(temp_x)
            y = np.argmax(y, axis=1)
            # 统计每个batch的预测正确数
            correct_cnt += np.sum(y == temp_true_label)

        acc = correct_cnt / x.shape[0]  # 计算准确率
        return acc

    def numerical_gradient(self, x, true_label):
        # 数值方法计算梯度
        loss_func = lambda _: self.loss(x, true_label)

        grads = {}
        for idx in range(1, 4):
            grads['W'+str(idx)] = ng(loss_func, self.params['W'+str(idx)])
            grads['b'+str(idx)] = ng(loss_func, self.params['b'+str(idx)])

        return grads

    def gradient(self, x, true_label):
        """
        反向传播计算梯度
        :param x:
        :param true_label:
        :return:
        """
        # 先前向传播计算中间值
        self.loss(x, true_label)

        """反向传播"""
        dout = 1
        dout = self.last_layer.backward(dout)  # 反向传播经过输出层的激活函数

        # 逐层反向传播
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 反向传播结束后从各层提取梯度
        grads = {}
        grads['W1'] = self.layers['Conv1'].dW
        grads['b1'] = self.layers['Conv1'].db
        grads['W2'] = self.layers['Affine1'].dW
        grads['b2'] = self.layers['Affine1'].db
        grads['W3'] = self.layers['Affine2'].dW
        grads['b3'] = self.layers['Affine2'].db

        return grads

    def save_params(self):
        # 持久化训练好的参数
        file_path = './data/params.pkl'
        with open(file_path, 'wb') as f:
            pickle.dump(self.params, f)

    def load_params(self):
        # 加载参数
        file_path = './data/params.pkl'

        with open(file_path, 'rb') as f:
            params = pickle.load(f)
            for key, val in params.items():
                # 直接将params赋值给self.params的话,
                # 改变params也会改变self.params,不安全
                self.params[key] = val
Exemplo n.º 28
0
class SimpleConvNet:
    """
    1st hidden layer: Convolution -> ReLU -> Pooling
    2nd hidden layer: Affine -> ReLU (fully-connected network, 완전연결층)
    출력층: Affine -> SoftmaxWithLoss
    """
    def __init__(self,
                 input_dim=(1, 28, 28),
                 conv_param={
                     'filter_num': 30,
                     'filter_size': 5,
                     'pad': 0,
                     'stride': 1
                 },
                 hidden_size=100,
                 output_size=10,
                 weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size +
                            2 * filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size / 2) *
                               (conv_output_size / 2))
        """ 인스턴스 초기화  - CNN 구성, 변수들 초기화"""

        # CNN layer에서 필요한 파라미터들
        self.params = dict()
        self.params['W1'] = weight_init_std * \
                            np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * \
                            np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * \
                            np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        # CNN layer(계층) 생성, 연결
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'],
                                           self.params['b1'],
                                           conv_param['stride'],
                                           conv_param['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
        self.last_layer = SoftmaxWithLoss()

    def predict(self):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def gradient(self, x, t):
        # 순전파
        self.loss(x, t)

        # 역전파
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.last_layer.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 결과 저장
        grads = {}
        grads['W1'] = self.layers['Conv1'].dW
        grads['b1'] = self.layers['Conv1'].db
        grads['W2'] = self.layers['Affine1'].dW
        grads['b2'] = self.layers['Affine1'].db
        grads['W3'] = self.layers['Affine2'].dW
        grads['b3'] = self.layers['Affine2'].db
        return grads
Exemplo n.º 29
0
class SimpleConvNet:
    """CNN"""
    def __init__(self,
                 input_dim=(1, 28, 28),
                 conv_param={
                     'filter_num': 30,
                     "filter_size": 5,
                     'pad': 0,
                     'stride': 1
                 },
                 hidden_size=100,
                 output_size=10,
                 weight_init_std=0.01):
        """

        :param input_dim:输入数据的维度:(通道,高,长)
        :param conv_param:卷积层的超参数(字典)。字典的关键字如下:
                            filter_num―滤波器的数量
                            filter_size―滤波器的大小
                            stride―步幅
                            pad―填充
        :param hidden_size:隐藏层(全连接)的神经元数量
        :param output_size:输出层(全连接)的神经元数量
        :param weight_init_std:初始化时权重的标准差
        """
        filter_num = conv_param['filter_num']  # 滤波器数量
        filter_size = conv_param['filter_size']  # 滤波器大小
        filter_pad = conv_param['pad']  # 滤波器填充
        filter_stride = conv_param['stride']  # 滤波器步幅
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size + 2 * filter_pad) / \
                           filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size / 2) *
                               (conv_output_size / 2))

        # 权重参数初始化
        self.params = {
            'W1':
            weight_init_std * np.random.randn(
                filter_num, input_dim[0], filter_size, filter_size),  # 卷积层权重
            'b1':
            np.zeros(filter_num),  # 卷积层偏置
            'W2':
            weight_init_std * np.random.randn(pool_output_size, hidden_size),
            'b2':
            np.zeros(hidden_size),
            'W3':
            weight_init_std * np.random.randn(hidden_size, output_size),
            'b3':
            np.zeros(output_size)
        }

        # 生成必要的层
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'],
                                           self.params['b1'],
                                           conv_param['stride'],
                                           conv_param['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        """预测"""
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        """损失函数"""
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def gradient(self, x, t):
        """方向传播发求梯度"""
        self.loss(x, t)

        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        grads = {
            'W1': self.layers['Conv1'].dW,
            'b1': self.layers['Conv1'].db,
            'W2': self.layers['Affine1'].dW,
            'b2': self.layers['Affine1'].db,
            'W3': self.layers['Affine2'].dW,
            'b3': self.layers['Affine2'].db
        }
        return grads

    def save_params(self, file_name="params.pkl"):
        """保存权重参数"""
        params = {}
        for key, val in self.params.items():
            params[key] = val
        with open(file_name, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
        """读取权重参数"""
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val

        for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
            self.layers[key].W = self.params['W' + str(i + 1)]
            self.layers[key].b = self.params['b' + str(i + 1)]

    def accuracy(self, x, t, batch_size=100):
        """计算精度"""
        if t.ndim != 1: t = np.argmax(t, axis=1)

        acc = 0.0

        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i * batch_size:(i + 1) * batch_size]
            tt = t[i * batch_size:(i + 1) * batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

        return acc / x.shape[0]
Exemplo n.º 30
0
class SimpleConvNet:
    """
    X > Convolution > activation function > pooling
    : Convolution > activation function > pooling  = 1개의 활성곱 레이어/ 1 hidden layer
    : there can be multiples of layers

    1st hidden layer: Convolution (W,b) -> ReLU -> Pooling
    2nd hidden layer: Affine (W,b) -> ReLU (fully-connected network, 완전 연결층)
    출력층: Affine (W,b) -> SoftmaxWithLoss

    # batch_normalization을 넣는다고 하면 또 다른 파라미터 (gamma, beta)가 있다
    # 파라미터가 많아지면 gradient를 계산하는 시간이 길어진다
    """

    def __init__(self, input_dim = (1, 28, 28),
                 conv_params = {'filter_num':30,'filter_size': 5, 'pad': 0, 'stride':1},
                 hidden_size = 100, output_size = 10, weight_init_std = 0.01):
       """ 인스턴스 초기화 (변수들의 초기값을 줌) - CNN 구성, 변수들 초기화
        input_dim: 입력 데이터 차원, MINIST인 경우(1, 28, 28)
        conv_param: Convolution 레이어의 파라미터(filter, bias)를 생성하기 위해 필요한 값들
            필터 개수 (filter_num),
            필터 크기(filter_size = filter_height = filter_width),
            패딩 개수(pad),
            보폭(stride)
        hidden_size: Affine 계층에서 사용할 뉴런의 개수 -> W 행렬의 크기
        output_size: 출력값의 원소의 개수. MNIST인 경우 10
        weight_init_std: 가중치(weight) 행렬을 난수로 초기화 할 때 사용할 표준편차 
        """
       filter_num = conv_params['filter_num']
       filter_size = conv_params['filter_size']
       filter_pad = conv_params['pad']
       filter_stride = conv_params['stride']
       input_size = input_dim[1]
       conv_output_size = (input_size - filter_size + 2 * filter_pad) / \
                          filter_stride + 1
       pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2))


       # CNN Layer에서 필요한 파라미터들
       self.params = dict()
       self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
       self.params['b1'] = np.zeros(filter_num)
       self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
       self.params['b2'] = np.zeros(hidden_size)
       self.params['W'] = weight_init_std * np.random.randn(hidden_size, output_size)
       self.params['b3'] = np.zeros(output_size)


       # CNN Layer(계층) 생성, 연결
       self.layers = OrderedDict()

        # 방법 1 __init__(self,W,b) 라고 주고,  self.W = W, self.b = b 를 선언
        # self.W = W # 난수로 생성하려고 해도 데이터의 크기(size)를 알아야 필터를 생성할 수 있다
        # self.b = b # bias의 크기는 필터의 크기와 같다. 마찬가지로 난수로 생성해도 크기를 알아야한다 => dimension 결정

        # 방법 2
        # input_dim = (1, 28, 28) = MNIST를 위한 클래스
        # dimension을 주도록 설정 + 필터갯수가 있도록 설정해줘야한다
        # convolution 할 때 필터를 몇번 만들 것인가 -> 난수로 만들어서 넣어줄 수 있다

                    # key값
       self.layers['Conv1'] = Convolution(self.params['W1'],
                                           self.params['b1'],
                                           conv_params['stride'],
                                           conv_params['pad'])  # W와 b를 선언
       self.layers['ReLu1'] = Relu() # x -> Convolution에서 전해주는 값
       self.layers['Pool1'] = Pooling(pool_h = 2, pool_w =2, stride =2)
       self.layers['Affine1'] = Affine(self.params['W2'],
                                        self.params['b2'])
       self.layers['Relu2'] = Relu()
       self.layers['Affine2'] = Affine(self.params['W3'],
                                        self.params['b3'])
       self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        """ network의 목적: 예측하는 것  """
        for layer in self.layers.vlaues():
            x = layer.forward(x)
            return x

    def loss(self, x, t):
        """ 순반향 전파가 모두 끝나고 손실 계산
        -> 이 손실을 꺼꾸로 보내면서 gradient를 계산
        """
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def accuracy(self):
        pass

    def gradient(self, x, t):
        # 순전파
        self.loss(x,t)

        # 역전파
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.vlaues())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        #결과저장
        grads = {}
        grads['W1'] = self.layers['Conv1'].dW
        grads['b1'] = self.layers['Conv1'].db
        grads['W2'] = self.layers['Affine1'].dW
        grads['b2'] = self.layers['Affine1'].db
        grads['W3'] = self.layers['Affine2'].dW
        grads['b3'] = self.layers['Affine2'].db