def __train(weight_init_std):
    bn_network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True)

    network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100],output_size=10, weight_init_std=weight_init_std)

    optimizer = SGD(lr=learning_rate)

    train_acc_list = []
    bn_train_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0

    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        for _network in (bn_network, network):
            grads = _network.gradient(x_batch, t_batch)
            optimizer.update(_network.params, grads)

        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            bn_train_acc = bn_network.accuracy(x_train, t_train)
            train_acc_list.append(train_acc)
            bn_train_acc_list.append(bn_train_acc)

            print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc))

            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break

    return train_acc_list, bn_train_acc_list
def __train(weight_init_std):
    bn_network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, 
                                    weight_init_std=weight_init_std, use_batchnorm=True)
    network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10,
                                weight_init_std=weight_init_std)
    optimizer = SGD(lr=learning_rate)
    
    train_acc_list = []
    bn_train_acc_list = []
    
    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0
    
    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]
    
        for _network in (bn_network, network):
            grads = _network.gradient(x_batch, t_batch)
            optimizer.update(_network.params, grads)
    
        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            bn_train_acc = bn_network.accuracy(x_train, t_train)
            train_acc_list.append(train_acc)
            bn_train_acc_list.append(bn_train_acc)
    
            print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc))
    
            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break
                
    return train_acc_list, bn_train_acc_list
def train(train_x, train_label, weight_init_std, learning_rate, max_epoch,
          batch_size):
    """
    构造带有BN层的神经网络和不带BN层的神经网络。测试BN层的效果
    :param train_x:
    :param train_label:
    :param weight_init_std: 参数初始化方式
    :param learning_rate:
    :param max_epoch: 测试的epoch数
    :param batch_size:
    :return:
    """
    bn_network = MultiLayerNetExtend(input_size=784,
                                     hidden_size_list=[100, 100, 100, 100, 100],
                                     output_size=10,
                                     weight_init_std=weight_init_std,
                                     use_batchnorm=True)
    network = MultiLayerNetExtend(input_size=784,
                                  hidden_size_list=[100, 100, 100, 100, 100],
                                  output_size=10,
                                  weight_init_std=weight_init_std,
                                  use_batchnorm=False)

    optimizer = SGD(learning_rate)

    train_acc_list = []
    bn_train_acc_list = []

    train_size = train_x.shape[0]
    iter_per_epoch = max(train_size / batch_size, 1)

    batch_mask = np.arange(train_size)
    np.random.shuffle(batch_mask)

    epoch_cnt = 0
    left = 0
    iteration = int(iter_per_epoch * max_epoch)

    for i in range(iteration):
        # 获取一个batch的数据,更新left值
        batch_x, batch_label, left = get_batch(train_x, train_label, batch_mask,
                                               batch_size, left)

        # 两个网络分别更新
        for _network in (bn_network, network):
            grads = _network.gradient(batch_x, batch_label)
            optimizer.update(_network.params, grads)

        # 每一个epoch记录一个在测试集上的准确率
        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(train_x, train_label)
            bn_train_acc = bn_network.accuracy(train_x, train_label)
            train_acc_list.append(train_acc)
            bn_train_acc_list.append(bn_train_acc)

            print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - "
                  + str(bn_train_acc))
            epoch_cnt += 1

    return train_acc_list, bn_train_acc_list
Пример #4
0
def main():
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
                                                      one_hot_label=True)
    x_train = x_train[:300]
    t_train = t_train[:300]

    max_epochs = 201
    train_size = x_train.shape[0]
    batch_size = 100
    learning_rate = 0.01

    use_dropout = True
    dropout_ratio = 0.2

    train_acc_list = []
    test_acc_list = []

    network = MultiLayerNetExtend(
        input_size=784,
        hidden_size_list=[100, 100, 100, 100, 100, 100],
        output_size=10,
        use_dropout=use_dropout,
        dropout_ration=dropout_ratio)
    optimizer = SGD(lr=learning_rate)

    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0

    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        grads = network.gradient(x_batch, t_batch)
        optimizer.update(network.params, grads)

        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            test_acc = network.accuracy(x_test, t_test)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)

            print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + \
                    ", test acc:" + str(test_acc))

            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break

    markers = {'train': 'o', 'test': 's'}
    x = np.arange(max_epochs)
    plt.plot(x, train_acc_list, marker='o', label='train', markevery=10)
    plt.plot(x, test_acc_list, marker='s', label='test', markevery=10)
    plt.xlabel("epochs")
    plt.ylabel("accuracy")
    plt.ylim(0, 1.0)
    plt.legend(loc='lower right')
    plt.show()
def __train(weight_init_std):
    bn_net = MultiLayerNetExtend(input_size=784,
                                 hidden_size_list=[100, 100, 100, 100],
                                 output_size=10,
                                 weight_init_std=weight_init_std,
                                 use_batchnorm=True)
    net = MultiLayerNetExtend(input_size=784,
                              hidden_size_list=[100, 100, 100, 100],
                              output_size=10,
                              weight_init_std=weight_init_std)
    optimizer = SGD(lr=learning_rate)

    train_acc_list = []
    bn_train_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0

    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        for _net in (bn_net, net):
            grads = _net.gradient(x_batch, t_batch)
            optimizer.update(_net.params, grads)

        if i % iter_per_epoch == 0:
            train_acc = net.accuracy(x_train, t_train)
            bn_train_acc = bn_net.accuracy(x_train, t_train)
            train_acc_list.append(train_acc)
            bn_train_acc_list.append(bn_train_acc)
            print("EPOCH: {0} | NET_ACC({1}) - BN_NET_ACC({2})".format(
                epoch_cnt, train_acc, bn_train_acc))
            epoch_cnt += 1
            if epoch_cnt >= max_epoches:
                break
    return train_acc_list, bn_train_acc_list
def __trainning(weight_init_std, index):

    epoch_num = 0
    acc_train_list = []
    acc_train_BN_list = []

    input_size = 784
    hidden_size_list = [100] * 5
    output_size = 10
    activation = 'ReLu'
    weight_decay_lambda = 1

    network = MultiLayerNetExtend(input_size=input_size,
                                  hidden_size_list=hidden_size_list,
                                  output_size=output_size,
                                  activation=activation,
                                  weight_init_std=weight_init_std,
                                  weight_decay_lambda=weight_decay_lambda,
                                  use_BatchNormalization=False,
                                  use_weight_decay=False)
    network_BN = MultiLayerNetExtend(input_size=784,
                                     hidden_size_list=hidden_size_list,
                                     output_size=output_size,
                                     activation=activation,
                                     weight_init_std=weight_init_std,
                                     weight_decay_lambda=weight_decay_lambda,
                                     use_BatchNormalization=True,
                                     use_weight_decay=False)

    # 开始训练,训练分下面几步
    # 1. 从样本中随机挑选出batch_size个样本
    # 2. 前向传播、反向传播、获取梯度
    # 3. 更新梯度
    # 4. 重复1 ~ 2直至循环结束
    for i in range(iter_num):

        print('现在是W' + str(index) + '的第' + str(i) + '轮循环')

        # 挑选mini_batch
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        # 获取梯度,更新梯度
        for network_ in (network, network_BN):
            grads = network_.gradient(x_train, t_train, use_weight_decay=False)
            # optimizer_SGD.update(params = network_.params, grads = grads)
            if network_ == network:
                optimizer.update(params=network_.params, grads=grads)
            else:
                optimizer_BN.update(params=network_.params, grads=grads)

        # 每一个epoch就计算两个网络的精度然后装进数组
        if i % iter_num_per_epch == 0:
            network_acc = network.accuracy(x_batch, t_batch)
            network_BN_acc = network_BN.accuracy(x_batch, t_batch)
            acc_train_list.append(network_acc)
            acc_train_BN_list.append(network_BN_acc)

            epoch_num += 1

            if epoch_num >= epoch_cnt_max:
                break

    return acc_train_list, acc_train_BN_list
Пример #7
0
optimizer = Sgd(learning_rate)
bn_optimizer = Sgd(learning_rate)

# 학습하면서 정확도의 변화를 기록
for i in range(iterations):
    # 미니 배치를 랜덤하게 선택(0~999 숫자들 중 128개를 랜덤하게 선택)
    mask = np.random.choice(train_size, batch_size)
    x_batch = X_train[mask]
    y_batch = Y_train[mask]

    # 배치 정규화를 사용하지 않는 신경망에서 gradient를 계산.
    gradients = neural_net.gradient(x_batch, y_batch)
    # 파라미터 업데이트(갱신) - W(가중치), b(편향)을 업데이트
    optimizer.update(neural_net.params, gradients)
    # 업데이트된 파라미터들을 사용해서 배치 데이터의 정확도 계산
    acc = neural_net.accuracy(x_batch, y_batch)
    # 정확도를 기록
    train_accuracies.append(acc)

    # 배치 정규화를 사용하는 신경망에서 같은 작업을 수행.
    bn_gradients = bn_neural_net.gradient(x_batch, y_batch)  # gradient 계산
    bn_optimizer.update(bn_neural_net.params, bn_gradients)  # W, b 업데이트
    bn_acc = bn_neural_net.accuracy(x_batch, y_batch)  # 정확도 계산
    bn_train_accuracies.append(bn_acc)  # 정확도 기록

    print(f'iteration #{i}: without={acc}, with={bn_acc}')

# 정확도 비교 그래프
x = np.arange(iterations)
plt.plot(x, train_accuracies, label='without BN')
plt.plot(x, bn_train_accuracies, label='using BN')
Пример #8
0
mini_batch_size = 100  # 1번 forward에 보낼 데이터 샘플 개수
train_size = X_train.shape[0]
iter_per_epoch = int(max(train_size / mini_batch_size, 1))
# 학습하면서 학습/테스트 데이터의 정확도를 각 에포크마다 기록
train_accuracies = []
test_accuracies = []

optimizer = Sgd(learning_rate=0.01)  # optimizer

for epoch in range(epochs):
    for i in range(iter_per_epoch):
        x_batch = X_train[(i * mini_batch_size):((i + 1) * mini_batch_size)]
        y_batch = Y_train[(i * mini_batch_size):((i + 1) * mini_batch_size)]
        gradients = neural_net.gradient(x_batch, y_batch)
        optimizer.update(neural_net.params, gradients)

    train_acc = neural_net.accuracy(X_train, Y_train)
    train_accuracies.append(train_acc)
    test_acc = neural_net.accuracy(X_test, Y_test)
    test_accuracies.append(test_acc)
    print(f'epoch #{epoch}: train={train_acc}, test={test_acc}')

x = np.arange(epochs)
plt.plot(x, train_accuracies, label='Train')
plt.plot(x, test_accuracies, label='Test')
plt.legend()
plt.title(f'Weight Decay (lambda={wd_rate})')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.show()
Пример #9
0
class Agent:
    """ 各エージェントの動作を main からこっちに書き写す形で. """

    n = int()
    AdjG_init = np.zeros((n, n))  #require to be {0 or 1} to all arguments
    WeiG_init = np.zeros((n, n))
    maxdeg = int()

    train_size = 0
    batch_size = 100

    # weight graph 作成規則 =====
    # wtype = "maximum-degree"
    wtype = "local-degree"

    # ===========================

    def __init__(self,
                 idx,
                 x_train,
                 t_train,
                 x_test,
                 t_test,
                 optimizer,
                 weight_decay_lambda=0.0):
        self.idx = idx
        # self.layer = MultiLayerNet(input_size=784, hidden_size_list=[100], output_size=10,
        #                             weight_decay_lambda=weight_decay_lambda)
        self.layer = MultiLayerNetExtend(
            input_size=784,
            hidden_size_list=[50, 50, 50],
            output_size=10,
            weight_decay_lambda=weight_decay_lambda,
            use_dropout=False,
            dropout_ration=0.0,
            use_batchnorm=False)
        self.optimizer = optimizer
        self.rec_param = np.array([{} for i in range(self.n)])

        self.x_train = x_train
        self.t_train = t_train
        self.x_test = x_test
        self.t_test = t_test

        self.z_vec = np.zeros(self.n)
        self.z_vec[idx] = 1
        self.rec_z = np.zeros((self.n, self.n))

        self.AdjG = np.zeros(self.n)  #require to be {0 or 1} to all arguments
        self.WeiG = np.zeros(self.n)
        if np.all(self.WeiG_init == 0):
            self.makeWeiGraph(self.AdjG_init)
        else:
            self.WeiG = self.WeiG_init[self.idx]
        self.makeAdjGraph()

        self.train_loss = 0
        self.train_acc = 0
        self.test_acc = 0

    def send(self, k, agent):
        """sending params to other nodes (return "self.layer.params"): send(agent)"""
        return (self.layer.params.copy(), self.z_vec.copy())

    def receive(self, agent, getparams, getz):
        """receiving other node's params: receive(agent, new_params)"""
        self.rec_param[agent] = getparams.copy()
        self.rec_z[agent] = getz.copy()

    def selectData(self, train_size, batch_size):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = self.x_train[batch_mask]
        t_batch = self.t_train[batch_mask]
        return x_batch, t_batch

    def consensus(self):
        self.weightConsensus()
        self.subvalConsensus()

    def weightConsensus(self):
        for key in self.layer.params.keys():
            self.layer.params[key] *= self.WeiG[self.idx]
            for idn in np.nonzero(self.AdjG)[0]:
                self.layer.params[
                    key] += self.WeiG[idn] * self.rec_param[idn][key]

    def subvalConsensus(self):
        self.rec_z[self.idx] = self.z_vec
        self.z_vec = np.dot(self.WeiG, self.rec_z)

    def update(self, k=1):
        x_batch, t_batch = self.selectData(self.train_size, self.batch_size)
        grads = self.layer.gradient(x_batch, t_batch)
        self.optimizer.update(self.layer.params, grads, self.z_vec[self.idx],
                              k)
        # self.optimizer.update(self.layer.params, grads)

    def calcLoss(self):
        self.train_acc = self.layer.accuracy(self.x_train, self.t_train)
        self.test_acc = self.layer.accuracy(self.x_test, self.t_test)
        self.train_loss = self.layer.loss(self.x_train, self.t_train)

    def makeAdjGraph(self):
        """make Adjecency Graph"""
        self.AdjG = self.AdjG_init[self.idx]

    def makeWeiGraph(self, lAdjG):
        """make Weight matrix"""
        if self.n is 1:
            tmpWeiG = np.ones([1])
        else:
            if self.wtype == "maximum-degree":
                tmpWeiG = (1 / (self.maxdeg + 1)) * lAdjG[self.idx]
                tmpWeiG[self.idx] = 1 - np.sum(tmpWeiG)
            elif self.wtype == "local-degree":
                ### count degrees ###
                #degMat = np.kron(np.dot(lAdjG,np.ones([self.n,1])), np.ones([1,self.n]))
                degMat = np.kron(
                    np.dot(lAdjG, np.ones([self.n, 1])) + 1,
                    np.ones([1, self.n]))
                ### take max() for each elements ###
                degMat = np.maximum(degMat, degMat.T)
                ### divide for each elememts ###
                tmpAllWeiG = lAdjG / degMat
                selfDegMat = np.eye(self.n) - np.diag(
                    np.sum(tmpAllWeiG, axis=1))
                tmpAllWeiG = tmpAllWeiG + selfDegMat
                tmpWeiG = tmpAllWeiG[self.idx, :]
            else:
                try:
                    raise ValueError("Error: invalid weight-type")
                except ValueError as e:
                    print(e)
        self.WeiG = tmpWeiG

    ########
    # debugging functions
    ########
    def degub_numericalGrad(self):
        return self.layer.numerical_gradient(self.x_train[:3],
                                             self.t_train[:3])

    def debug_backpropGrad(self):
        return self.layer.gradient(self.x_train[:3], self.t_train[:3])

    def debug_consensus(self):
        params = self.layer.params.copy()
        self.weightConsensus()
        self.subvalConsensus()
        if self.idx == 0:
            ano_params = self.layer.params.copy()
            for key in params.keys():
                diff = np.average(np.abs(params[key] - ano_params[key]))
                print(key + ":" + str(diff))
Пример #10
0
for i in range(10000):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 学習フェーズ
    grads = network.gradient(x_train, t_train)
    optimizer.update(network.params, grads)
    
    if i % iter_per_epoch == 0:
        #loss_train = network.loss(x_train, t_train)
        #loss_test = network.loss(x_test, t_test)
        #train_loss_list.append(loss_train)
        #test_loss_list.append(loss_test)
        
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        
        epoch_cnt += 1
        print("---{}/{}---".format(epoch_cnt, max_epochs))
        #print("loss_train : " + str(loss_train))
        #print("loss_test  : " + str(loss_test))
        print("acc_train  : " + str(train_acc))
        print("acc_test   : " + str(test_acc))
        if epoch_cnt >= max_epochs:
            break

#plt.plot(train_loss_list, label="train_loss_list")
#plt.plot(test_loss_list, label="test_loss_list")
Пример #11
0
class Agent:
    """アルゴリズム構築に必要なAgentの機能
    Function:
        send : 隣接するagentの以下の状態変数を送る
            Args:
                layer.param (np.array) : 各層のパラメータ
        
        receive : 隣接するagentから状態変数を受け取る
            Args:
                layer.param (np.arrar) : 各層のパラメータ
        
        optimizer(SGD) : 確率勾配をバックプロパゲーションとランダムシードを用いて実装
            For example:
                self.optimizer = optimizer(SGD(lr))
                x_batch, t_batch = self.selectData(self.train_size, self.batch_size)
                grads = self.layer.gradient(x_batch, t_batch)
                self.optimizer.update(self.layer.params, grads, k)
    """
    """ 各エージェントの動作を main からこっちに書き写す形で. """

    n = int()
    AdjG_init = np.zeros((n, n))  #require to be {0 or 1} to all arguments
    WeiG_init = np.zeros((n, n))
    maxdeg = int()

    train_size = 0
    batch_size = 100

    # weight graph 作成規則 =====
    # wtype = "maximum-degree"
    wtype = "local-degree"

    # ===========================

    def __init__(self,
                 idx,
                 x_train,
                 t_train,
                 x_test,
                 t_test,
                 optimizer,
                 weight_decay_lambda=0.0):
        """各Agentの初期状態変数
        Args:
            idx         : Agentのインデックス
            layer       : Agent内のニューラルネットワークの層
            optimizer   : 最適化を行うアルゴリズムの選択
            rec_param   : 隣接するエージェントから受け取るパラメータ
            z_vec       : 左の固有ベクトル
            rec_z       : 隣接するエージェントから受け取る左固有ベクトル
            AdjG        : 隣接行列??
            WeiG        : 重み行列??
        """

        self.idx = idx
        # self.layer = MultiLayerNet(input_size=784, hidden_size_list=[100], output_size=10,
        #                             weight_decay_lambda=weight_decay_lambda)
        self.layer = MultiLayerNetExtend(
            input_size=784,
            hidden_size_list=[
                500, 400, 300, 300, 200, 200, 100, 100, 100, 100, 100, 50, 50
            ],
            output_size=10,
            weight_decay_lambda=weight_decay_lambda,
            use_dropout=True,
            dropout_ration=0.3,
            use_batchnorm=True)
        #dropout_ratio=0.03, use_batchnorm=True, hidden_size_list=[500,400,300,300,200,200,100,100,100,50,50,50] weightdecay=0.01 → 0.9428
        #hidden_size_list=[500,400,300,300,200,200,100,100,100,50,50,50], output_size=10,weight_decay_lambda=weight_decay_lambda,use_dropout=True, dropout_ration=0.05, use_batchnorm=True 一番いい
        #hidden_size_list=[100,100,100,100,100] weightdecay=0.3, dropout_ration=0.3

        self.optimizer = optimizer
        self.rec_param = np.array([{} for i in range(self.n)])
        self.send_param = np.array([{} for i in range(self.n)])

        #Initialize
        self.rec_param[self.idx] = self.layer.params.copy()
        self.send_param[self.idx] = self.layer.params.copy()

        self.x_train = x_train
        self.t_train = t_train
        self.x_test = x_test
        self.t_test = t_test

        self.AdjG = np.zeros(self.n)  #require to be {0 or 1} to all arguments
        self.WeiG = np.zeros(self.n)
        if np.all(self.WeiG_init == 0):
            self.makeWeiGraph(self.AdjG_init)
        else:
            self.WeiG = self.WeiG_init[self.idx]
        self.makeAdjGraph()

        self.train_loss = 0
        self.train_acc = 0
        self.test_acc = 0

    #山下さんのアルゴリズム構築に必要な関数
    # def send(self, k, agent):
    #     """sending params to other nodes (return "self.layer.params"): send(agent)"""
    #     return (self.layer.params.copy(), self.z_vec.copy())

    # def receive(self, agent, getparams, getz):
    #     """receiving other node's params: receive(agent, new_params)"""
    #     self.rec_param[agent] = getparams.copy()
    #     self.rec_z[agent] = getz.copy()

    def send(self, k, agent):
        """sending params to other nodes (return "self.layer.params"): send(agent)"""
        self.send_param[self.idx] = self.layer.params.copy()
        return self.layer.params.copy()

    def receive(self, agent, getparams):
        """receiving other node's params: receive(agent, new_params)"""
        for key in getparams.keys():
            self.rec_param[agent][key] = getparams[key].copy()

    def selectData(self, train_size, batch_size):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = self.x_train[batch_mask]
        t_batch = self.t_train[batch_mask]
        return x_batch, t_batch

    def consensus(self):
        self.weightConsensus()
        # self.subvalConsensus()

    def weightConsensus(self):
        for key in self.layer.params.keys():
            self.layer.params[key] *= self.WeiG[self.idx]
            for idn in np.nonzero(self.AdjG)[0]:
                self.layer.params[
                    key] += self.WeiG[idn] * self.rec_param[idn][key]

    # def subvalConsensus(self):
    # self.rec_z[self.idx] = self.z_vec
    # self.z_vec = np.dot(self.WeiG, self.rec_z)

    def update(self, k=1):
        x_batch, t_batch = self.selectData(self.train_size, self.batch_size)
        grads = self.layer.gradient(x_batch, t_batch)
        # self.optimizer.update(self.layer.params, grads, self.z_vec[self.idx], k)
        self.send_param[self.idx], self.rec_param[
            self.idx] = self.optimizer.update(self.layer.params, grads,
                                              self.rec_param[self.idx],
                                              self.send_param[self.idx],
                                              self.WeiG[self.idx], k)

    def calcLoss(self):
        self.train_acc = self.layer.accuracy(self.x_train, self.t_train)
        self.test_acc = self.layer.accuracy(self.x_test, self.t_test)
        self.train_loss = self.layer.loss(self.x_train, self.t_train)

    def makeAdjGraph(self):
        """make Adjecency Graph"""
        self.AdjG = self.AdjG_init[self.idx]

    def makeWeiGraph(self, lAdjG):
        """make Weight matrix
            2020/01/28 山下さんは有効グラフを作成している.無向グラフに変更("maximum-degree"の方のみ)
        Args:
            tmpWeiG (np.array)      : 一次的な重み行列


        """

        if self.n is 1:
            tmpWeiG = np.ones([1])
        else:
            if self.wtype == "maximum-degree":
                tmpWeiG = (1 / (self.maxdeg + 1)) * lAdjG[self.idx]
                tmpWeiG[self.idx] = 1 - np.sum(tmpWeiG)
            elif self.wtype == "local-degree":
                ### count degrees ###
                #degMat = np.kron(np.dot(lAdjG,np.ones([self.n,1])), np.ones([1,self.n]))
                degMat = np.kron(
                    np.dot(lAdjG, np.ones([self.n, 1])) + 1,
                    np.ones([1, self.n]))
                ### take max() for each elements ###
                degMat = np.maximum(degMat, degMat.T)
                ### divide for each elememts ###
                tmpAllWeiG = lAdjG / degMat
                selfDegMat = np.eye(self.n) - np.diag(
                    np.sum(tmpAllWeiG, axis=1))
                tmpAllWeiG = tmpAllWeiG + selfDegMat
                tmpWeiG = tmpAllWeiG[self.idx, :]
            else:
                try:
                    raise ValueError("Error: invalid weight-type")
                except ValueError as e:
                    print(e)
        self.WeiG = tmpWeiG

    ########
    # debugging functions
    ########
    def degub_numericalGrad(self):
        return self.layer.numerical_gradient(self.x_train[:3],
                                             self.t_train[:3])

    def debug_backpropGrad(self):
        return self.layer.gradient(self.x_train[:3], self.t_train[:3])

    def debug_consensus(self):
        params = self.layer.params.copy()
        self.weightConsensus()
        self.subvalConsensus()
        if self.idx == 0:
            ano_params = self.layer.params.copy()
            for key in params.keys():
                diff = np.average(np.abs(params[key] - ano_params[key]))
                print(key + ":" + str(diff))
# epoch = 0

for i in range(iterations):
    # 미니 배치를 랜덤하게 선택 (0~999 숫자들 중 128개를 랜덤하게 선택)
    mask = np.random.choice(train_size, batch_size)
    x_batch = X_train[mask]
    y_batch = Y_train[mask]

    # 배치 정규화
    # 배치 정규화를 사용하지 않는 신경망에서 gradient 계산
    gradients = neural_net.gradient(x_batch, y_batch)
    # 파라미터 업데이트 -> W와 b 변경
    optimizer.update(neural_net.params, gradients)
    # 업데이트된 배치 데이터의 정확도를 계산
    accuracy = neural_net.accuracy(x_batch, y_batch)
    # 정확도를 기록
    neural_accuracy.append(accuracy)

    # 배치 정규화를 사용하는 신경망에서 gradient 계산
    bn_gradients = bn_neural_net.gradient(x_batch, y_batch)
    # 파라미터 업데이트 -> W와 b 변경
    bn_optimizer.update(bn_neural_net.params, bn_gradients)
    # 업데이트된 배치 데이터의 정확도를 계산
    bn_accuracy = bn_neural_net.accuracy(x_batch, y_batch)
    # 정확도를 기록
    bn_neural_accuracy.append(bn_accuracy)

    print(
        f'iteration #{i}: without = {neural_accuracy[i]}, with = {bn_neural_accuracy[i]}'
    )
Пример #13
0
batch_size = 128
learning_rate = 0.01
neural_optimizer = Sgd()
bn_neural_optimizer = Sgd()
neural_acc_list = []
bn_neural_acc_list = []

for i in range(iterator):
    mask = np.random.choice(X_train.shape[0], batch_size)
    X_batch = X_train[mask]
    Y_batch = Y_train[mask]
    for network in (neural_net, bn_neural_net):
        gradient = network.gradient(X_batch, Y_batch)
        if network == neural_net:
            neural_optimizer.update(network.params, gradient)
            neural_acc = neural_net.accuracy(X_batch, Y_batch)
            neural_acc_list.append(neural_acc)
        else:
            bn_neural_optimizer.update(network.params, gradient)
            bn_neural_acc = bn_neural_net.accuracy(X_batch, Y_batch)
            bn_neural_acc_list.append(bn_neural_acc)
    print(f'===== {i}번째 training =====')
    print('Without BatchNorm', neural_acc_list[-1])
    print('BatchNorm', bn_neural_acc_list[-1])

    # gradients = neural_net.gradient(X_batch, Y_batch)
    # neural_optimizer.update(neural_net.params, gradients)
    # neural_acc = neural_net.accuracy(X_batch, Y_batch)
    # neural_acc_list.append(neural_acc)
    #
    # bn_gradients = bn_neural_net.gradient(X_batch, Y_batch)
Пример #14
0
    # 随机挑选出batch_size个样本
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # 前向传播,反向传播,获取梯度
    for _network in (network, network_weight_decay):
        grads = _network.gradient(x_batch,
                                  t_batch,
                                  use_weight_decay=_network.use_weight_decay)
        optimizer.update(_network.params, grads)

    if i % iter_num_per_epoch == 0:
        epoch_num += 1

        train_acc_network = network.accuracy(x_train, t_train)
        train_acc_list.append(train_acc_network)

        test_acc_network = network.accuracy(x_test, t_test)
        test_acc_list.append(train_acc_network)

        train_acc_network_weight_decay = network_weight_decay.accuracy(
            x_train, t_train)
        train_acc_weight_decay_list.append(train_acc_network_weight_decay)

        test_acc_network_weight_decay = network_weight_decay.accuracy(
            x_test, t_test)
        test_acc_weight_decay_list.append(test_acc_network_weight_decay)

        if epoch_num >= epoch_cnt_max:
            break