def make_net2():
    net = MultiLayerNet(is_use_dropout=False)
    net.add_layer(Layer.Conv2D(32, (3, 3), pad=1, input_size=(1, 64, 64)),
                  initializer=Initializer.He(),
                  activation=Layer.Relu())
    net.add_layer(Layer.Pooling(pool_h=2, pool_w=2, stride=2))
    net.add_layer(
        Layer.Dense(128, initializer=Initializer.He(),
                    activation=Layer.Relu()))
    net.add_layer(Layer.Dense(2, initializer=Initializer.He()))
    net.add_layer(Layer.SoftmaxWithLoss())

    return net
def main():
    (x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
    print(x_train.shape, t_train.shape)
    print(t_train[0])

    net = MultiLayerNet(is_use_dropout=False)
    net.add_layer(Layer.Conv2D(16, (3, 3), pad=1, input_size=(1, 28, 28)), initializer=Initializer.He(),
                  activation=Layer.Relu())
    net.add_layer(Layer.BatchNormalization())
    net.add_layer(Layer.Relu())
    net.add_layer(Layer.Pooling(pool_h=2, pool_w=2, stride=2))
    net.add_layer(Layer.Conv2D(16, (3, 3), pad=1, initializer=Initializer.He()))
    net.add_layer(Layer.BatchNormalization())
    net.add_layer(Layer.Relu())
    net.add_layer(Layer.Pooling(pool_h=2, pool_w=2, stride=2))
    net.add_layer(Layer.Dense(20, initializer=Initializer.He(), activation=Layer.Relu()))
    net.add_layer(Layer.Dropout(0.5))
    net.add_layer(Layer.Dense(10))
    net.add_layer(Layer.Dropout(0.5))
    net.add_layer(Layer.SoftmaxWithLoss())


    if gpu_enable:
        net.to_gpu()

    for k, v in net.params.items():
        print(k, v.shape)

    result = net.train(
        x_train, t_train, x_test, t_test, batch_size=200, iters_num=100, print_epoch=1, evaluate_limit=500,
        is_use_progress_bar=True,
        optimizer=Optimizer.Adam(lr=0.001))

    import pickle
    import datetime
    ## Save pickle
    with open(f"train_data_{str(datetime.datetime.now())[:-7].replace(':', '')}.pickle", "wb") as fw:
        pickle.dump(result, fw)
    # net.save_model()

    print("============================================")
def make_net1():
    net = MultiLayerNet(is_use_dropout=False)
    net.add_layer(Layer.Conv2D(32, (3, 3), pad=1, input_size=(1, 128, 128)),
                  initializer=Initializer.He())
    net.add_layer(Layer.BatchNormalization())
    net.add_layer(Layer.Relu())
    net.add_layer(Layer.Pooling(pool_h=2, pool_w=2, stride=2))
    # net.add_layer(Layer.Conv2D(64, (3, 3), pad=1, initializer=Initializer.He()))
    # net.add_layer(Layer.BatchNormalization())
    # net.add_layer(Layer.Relu())
    # net.add_layer(Layer.Pooling(pool_h=2, pool_w=2, stride=2))
    net.add_layer(Layer.Conv2D(32, (3, 3), pad=1,
                               initializer=Initializer.He()))
    net.add_layer(Layer.BatchNormalization())
    net.add_layer(Layer.Relu())
    net.add_layer(Layer.Pooling(pool_h=2, pool_w=2, stride=2))
    net.add_layer(
        Layer.Dense(30, initializer=Initializer.He(), activation=Layer.Relu()))
    net.add_layer(Layer.Dropout(0.5))
    net.add_layer(Layer.Dense(3))
    net.add_layer(Layer.Dropout(0.5))
    net.add_layer(Layer.SoftmaxWithLoss())
    return net
    def add_layer(self, layer, **kwargs):

        is_direct = True
        if "is_direct" in kwargs and kwargs["is_direct"] == False:
            is_direct = False

        if not isinstance(layer, Layer.LayerType):
            raise BaseException("Layer required")

        if type(self.lastLayer) == type(Layer.SoftmaxWithLoss()):
            raise BaseException("Already last layer set")

        layer_len = len(self.layers)

        if isinstance(layer, Layer.Dense):

            input_size = layer.input_size

            if input_size is None and len(self.hiddenSizeList) > 0:
                #                 input_size = self.prevDenseLayer.hidden_size
                #                 print(self.hiddenSizeList)
                input_size = int(np.prod(self.hiddenSizeList[-1]))
            else:
                self.hiddenSizeList.append(input_size)

            hidden_size = layer.hidden_size

            weight_init_std = 0.01
            initializer = layer.initializer

            pre_node_nums = input_size

            if isinstance(initializer, Initializer.Std):
                weight_init_std = initializer.std
            elif isinstance(initializer, Initializer.He):
                weight_init_std = np.sqrt(2.0 / pre_node_nums)
            elif isinstance(initializer, Initializer.Xavier):
                weight_init_std = np.sqrt(1.0 / pre_node_nums)

            self.hiddenSizeList.append(hidden_size)
            #             print(input_size)
            #             print("input", self.hiddenSizeList)
            #             print(input_size, hidden_size)
            self.params[f"W{layer_len}"] = weight_init_std * np.random.randn(input_size, hidden_size)
            self.params[f"b{layer_len}"] = np.zeros(hidden_size)

            self.layers[f"Affine{layer_len}"] = Layer.Affine(self.params[f"W{layer_len}"], self.params[f"b{layer_len}"])

            self.prevDenseLayer = layer

            if layer.activation is not None:
                self.add_layer(layer.activation, is_direct=False)

            if self.is_use_dropout:
                layer_len = len(self.layers)
                self.layers['Dropout' + str(layer_len)] = Layer.Dropout(self.dropout_ratio)


        elif isinstance(layer, Layer.Conv2D):

            if self.pre_channel_num == None:
                self.pre_channel_num = layer.input_size[0]

            input_size = layer.input_size

            if input_size is None and len(self.hiddenSizeList) > 0:
                input_size = self.hiddenSizeList[-1]
                # print(input_size)
                # print(self.hiddenSizeList)

            filter_num = layer.filter_num

            weight_init_std = 0.01
            initializer = layer.initializer

            if self.prevConvLayer is None:
                pre_node_nums = np.prod(layer.filter_size)
            else:
                pre_node_nums = self.prevConvLayer.filter_num * np.prod(layer.filter_size)

            if isinstance(initializer, Initializer.Std):
                weight_init_std = initializer.std
            elif isinstance(initializer, Initializer.He):
                weight_init_std = np.sqrt(2.0 / pre_node_nums)
            elif isinstance(initializer, Initializer.Xavier):
                weight_init_std = np.sqrt(1.0 / pre_node_nums)

            # print(pre_node_nums)

            # print(layer)
            # print(layer.filter_size, layer.pad, layer.stride, input_size, "mu")

            conv_output_size = (
                layer.filter_num, int((input_size[1] - layer.filter_size[0] + 2 * layer.pad) / layer.stride + 1),
                int((input_size[2] - layer.filter_size[1] + 2 * layer.pad) / layer.stride + 1))
            #
            self.hiddenSizeList.append(conv_output_size)

            # print("input", self.hiddenSizeList)

            debug_print(input_size)

            self.params[f"W{layer_len}"] = weight_init_std * np.random.randn(layer.filter_num, self.pre_channel_num,
                                                                             layer.filter_size[0], layer.filter_size[1])

            self.pre_channel_num = layer.filter_num

            # print("??")

            self.params[f"b{layer_len}"] = np.zeros(layer.filter_num)
            debug_print(input_size)
            self.layers[f"Convolution{layer_len}"] = Layer.Convolution(self.params[f"W{layer_len}"],
                                                                       self.params[f"b{layer_len}"], layer.stride,
                                                                       layer.pad)
            debug_print(input_size)
            self.prevConvLayer = layer

            debug_print(input_size)

            if layer.activation is not None:
                self.add_layer(layer.activation, is_direct=False)

            if self.is_use_dropout:
                self.layers['Dropout' + str(layer_len)] = Layer.Dropout(self.dropout_ratio)
            debug_print(input_size)

        elif isinstance(layer, Layer.Pooling):
            prevLayer = self.prevConvLayer
            #             conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1

            conv_output_size = self.hiddenSizeList[-1]

            #             print(conv_output_size)
            pool_output_size = (
                prevLayer.filter_num, int((conv_output_size[1] / layer.stride)),
                int((conv_output_size[2] / layer.stride)))
            # print("yaya", conv_output_size, pool_output_size)

            self.hiddenSizeList.append(pool_output_size)

            self.layers[f"Pooling{layer_len}"] = layer


        elif isinstance(layer, Layer.Relu):

            self.layers[f"Relu{layer_len}"] = Layer.Relu()

        elif isinstance(layer, Layer.Sigmoid):

            self.layers[f"Sigmoid{layer_len}"] = Layer.Sigmoid()

        elif isinstance(layer, Layer.SoftmaxWithLoss):

            #             self.layers[f"SoftmaxWithLoss{layer_len}"] = Layer.SoftmaxWithLoss()
            self.lastLayer = layer

        elif isinstance(layer, Layer.IdentityWithLoss):

            #             self.layers[f"SoftmaxWithLoss{layer_len}"] = Layer.SoftmaxWithLoss()
            self.lastLayer = layer


        elif isinstance(layer, Layer.BatchNormalization):
            # print(self.hiddenSizeList)
            self.layers[f"BatchNormal{layer_len}"] = Layer.BatchNormalization(
                gamma=np.ones(np.prod(self.hiddenSizeList[-1])),
                beta=np.zeros(np.prod(self.hiddenSizeList[-1])),
                running_mean=layer.running_mean,
                running_var=layer.running_var
            )
        # print(layer, self.hiddenSizeList)
        if is_direct:
            self.added_layer_list.append(layer)
    return x, t


(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
                                                  one_hot_label=True)

print(x_train.shape, t_train.shape, x_test.shape, t_test.shape)

x_data = np.append(x_train, x_test, axis=0)
t_data = np.append(t_train, t_test, axis=0)

net = MultiLayerNet(is_use_dropout=True, dropout_ratio=0.2)
net.add_layer(Layer.Dense(30, input_size=784, initializer=Initializer.He()))
net.add_layer(Layer.BatchNormalization())
net.add_layer(Layer.Relu())
net.add_layer(Layer.Dense(64), initializer=Initializer.He())
net.add_layer(Layer.BatchNormalization())
net.add_layer(Layer.Relu())
net.add_layer(Layer.Dense(64), initializer=Initializer.He())
net.add_layer(Layer.BatchNormalization())
net.add_layer(Layer.Relu())
net.add_layer(Layer.Dense(64), initializer=Initializer.He())
net.add_layer(Layer.BatchNormalization())
net.add_layer(Layer.Relu())
net.add_layer(Layer.Dense(64), initializer=Initializer.He())
net.add_layer(Layer.BatchNormalization())
net.add_layer(Layer.Relu())
net.add_layer(
    Layer.Dense(10,
                initializer=Initializer.He(),