Пример #1
0
 def _get_decoder(self):
     output = nn.Dense(self._vocab_size, prefix='decoder0_')
     return output
Пример #2
0
from mxnet import nd, init
from mxnet.gluon import nn, loss as gloss
from mxnet import autograd

net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()

X = nd.random.uniform(shape=(2, 20))
Y = net(X)

# print(X)
print(net[0].params)
print(type(net[0].params))
print(net[0].weight)
print(type(net[0].weight))
# print(net[1].params, type(net[1].params))
# print(net[0].params['dense0_weight'] == net[0].weight)
# print(net[0].weight.data())
# print(net[0].weight.grad())
# print(net[1].bias.data())
# print(net.collect_params())
# print(net.collect_params('.*weight'))

# X.attach_grad()
# with autograd.record():
#     y_hat = net(X)
# y_hat.backward()
# print(net[0].weight.grad())
Пример #3
0
 def __init__(self, rnn_layer, vocab_size, **kwargs):
     super(RNNModel, self).__init__(**kwargs)
     self.rnn = rnn_layer
     self.vocab_size = vocab_size
     self.dense = nn.Dense(vocab_size)
Пример #4
0
    def __init__(self,
                 query_units,
                 num_heads,
                 pos_embed_units: Optional[int] = None,
                 max_distance=None,
                 bidirectional=False,
                 num_buckets=None,
                 method='transformer_xl',
                 dropout: float = 0.0,
                 dtype='float32',
                 layout='NTK',
                 use_einsum=False):
        """

        Parameters
        ----------
        query_units
        num_heads
        pos_embed_units
        max_distance
        bidirectional
        num_buckets
        method
        dropout
        attention_dropout
        query_add_bias
            Add additional bias term to the query
        scaled
        dtype
        layout
        """
        super().__init__()
        self._dropout = dropout
        self._method = method
        self._query_units = query_units
        self._num_heads = num_heads
        self._bidirectional = bidirectional
        self._num_buckets = num_buckets
        assert query_units % num_heads == 0, 'The units must be divisible by the number of heads.'
        self._head_query_units = query_units // num_heads
        self._max_distance = max_distance
        self._pos_embed_units = pos_embed_units
        self._dtype = dtype
        self._use_einsum = use_einsum
        self._layout = layout
        if self._layout not in ['NKT', 'NTK', 'TNK']:
            raise ValueError('layout="{}" is not supported'.format(
                self._layout))
        if method == 'transformer_xl':
            if pos_embed_units is None:
                pos_embed_units = self._num_heads * self._head_query_units
            self._rel_pos_embed = SinusoidalPositionalEmbedding(
                units=pos_embed_units, dtype=self._dtype)
            self._rel_proj = nn.Dense(units=query_units,
                                      in_units=pos_embed_units,
                                      flatten=False,
                                      use_bias=False,
                                      dtype=self._dtype)
            self._dropout_layer = nn.Dropout(dropout)
        elif method == 'shaw':
            assert self._max_distance is not None, 'Must set max_distance when method="shaw".'
            if self._bidirectional:
                vocab_size = self._max_distance * 2 + 1
            else:
                vocab_size = self._max_distance + 1
            self._rel_pos_embed = LearnedPositionalEmbedding(
                units=self._num_heads * self._head_query_units,
                max_length=vocab_size,
                weight_initializer=mx.init.Xavier(rnd_type="gaussian",
                                                  factor_type="in",
                                                  magnitude=1),
                mode='wrap' if self._bidirectional else 'raise',
                dtype=self._dtype)
        elif method == 't5':
            if self._num_buckets is None:
                self._num_buckets = 32
            if self._max_distance is None:
                self._max_distance = 128
            self._rel_pos_embed = BucketPositionalEmbedding(
                units=num_heads,
                num_buckets=self._num_buckets,
                max_distance=self._max_distance,
                bidirectional=self._bidirectional,
                dtype=self._dtype)
        else:
            raise NotImplementedError(
                'method="{}" is currently not supported!'.format(method))
N=X_tr.shape[0]
p=X_tr.shape[1]

X_train=nd.array(X_tr)
y_train=nd.array(y_tr)

batch_size=200
train_dataset = ArrayDataset(X_train, y_train)
train_data = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
X_test=nd.array(X_te).as_in_context(ctx)
y_test=nd.array(y_te).as_in_context(ctx)


net=nn.Sequential()
net.add(nn.Dense(1000,activation='relu'),nn.Dense(1, activation=None))


net.initialize(mx.init.Xavier(), ctx=ctx)

for i, (data, label) in enumerate(train_data):
    aa=net(data.as_in_context(ctx))
    break
net_initial=net


def init_masks_percents(net,p):
    masks={}
    percents={}
    for i in enumerate(net):
        masks[i[0]]=nd.ones(net[i[0]].weight.data().shape).as_in_context(ctx)
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 20 09:46:48 2020

@author: DER
"""

import d2lzh as d2l
from mxnet import gluon, init
from mxnet.gluon import loss as gloss, nn
""" 3.10.1定义模型 """
net = nn.Sequential()
net.add(nn.Dense(256, activation="relu"), nn.Dense(10))
print(net)
net.initialize(init.Normal(sigma=0.01))
""" 3.10.2训练模型 """
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

loss = gloss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), "sgd", {"learning_rate": 0.5})
num_epochs = 5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None,
              None, trainer)
Пример #7
0
 def __init__(self, action_dim):
     super(Actor, self).__init__()
     self.action_dim = action_dim
     self.dense0 = nn.Dense(400, activation='relu')
     self.dense1 = nn.Dense(300, activation='relu')
     self.dense2 = nn.Dense(self.action_dim)
 def __init__(self, **kwargs):
   super(TailNegBlock, self).__init__(**kwargs)
   self.fc1 = nn.Dense(10, flatten=True)
   self.fc2 = nn.Dense(10, flatten=True)
Пример #9
0
 def __init__(self, bert, prefix=None, params=None):
     super(BertForQA, self).__init__(prefix=prefix, params=params)
     self.bert = bert
     with self.name_scope():
         self.span_classifier = nn.Dense(units=2, flatten=False)
Пример #10
0
 def __init__(self, **kwargs):
     super(MLP, self).__init__(**kwargs)
     self.hidden = nn.Dense(256, activation='relu')
     self.output = nn.Dense(10)
Пример #11
0
 def __init__(self, vocab_size=5, hidden_units=4):
     super().__init__()
     self.x2h_map = nn.Embedding(input_dim=vocab_size, output_dim=hidden_units)
     self.h2h_map = nn.Dense(units=hidden_units, flatten=False)
     self.vocab_map = nn.Dense(units=vocab_size, flatten=False)
        loss.backward()
        trainer.step(data.shape[0])


# Your first model should be a sequential network, with 3 layers. You first layer should have 16 hidden units, the second should have 8 hidden units and the last layer should the correct number of output units for the classification task at hand. You should add ReLU activations on all hidden layers, but not the output layer. You should define `network` in the cell below.
# 
# **Hint**: You'll find classes in the `mxnet.gluon.nn` subpackage useful for this task.

# In[47]:


# YOUR CODE HERE
from mxnet.gluon import nn
network = nn.Sequential()
network.add(
    nn.Dense(16, activation='relu'),
    nn.Dense(8, activation='relu'),
    nn.Dense(10)
)
#raise NotImplementedError()


# In[48]:


assert isinstance(network, mx.gluon.nn.Sequential)
assert len(network) == 3
assert isinstance(network[0], mx.gluon.nn.Dense)
assert network[0].act.name.endswith('relu')
assert network[0].weight.shape[0] == 16
assert isinstance(network[1], mx.gluon.nn.Dense)
Пример #13
0
    def __init__(self,
                 channels,
                 init_block_channels,
                 final_block_channels,
                 residuals,
                 shortcuts,
                 kernel_sizes,
                 expansions,
                 bn_epsilon=1e-3,
                 bn_use_global_stats=False,
                 in_channels=3,
                 in_size=(224, 224),
                 classes=1000):
        super(ProxylessNAS, self).__init__()
        self.in_size = in_size
        self.classes = classes

        with self.name_scope():
            self.features = nn.HybridSequential(prefix="")
            self.features.add(
                conv3x3_block(in_channels=in_channels,
                              out_channels=init_block_channels,
                              strides=2,
                              bn_epsilon=bn_epsilon,
                              bn_use_global_stats=bn_use_global_stats,
                              activation="relu6"))
            in_channels = init_block_channels
            for i, channels_per_stage in enumerate(channels):
                stage = nn.HybridSequential(prefix="stage{}_".format(i + 1))
                residuals_per_stage = residuals[i]
                shortcuts_per_stage = shortcuts[i]
                kernel_sizes_per_stage = kernel_sizes[i]
                expansions_per_stage = expansions[i]
                with stage.name_scope():
                    for j, out_channels in enumerate(channels_per_stage):
                        residual = (residuals_per_stage[j] == 1)
                        shortcut = (shortcuts_per_stage[j] == 1)
                        kernel_size = kernel_sizes_per_stage[j]
                        expansion = expansions_per_stage[j]
                        strides = 2 if (j == 0) and (i != 0) else 1
                        stage.add(
                            ProxylessUnit(
                                in_channels=in_channels,
                                out_channels=out_channels,
                                kernel_size=kernel_size,
                                strides=strides,
                                bn_epsilon=bn_epsilon,
                                bn_use_global_stats=bn_use_global_stats,
                                expansion=expansion,
                                residual=residual,
                                shortcut=shortcut))
                        in_channels = out_channels
                self.features.add(stage)
            self.features.add(
                conv1x1_block(in_channels=in_channels,
                              out_channels=final_block_channels,
                              bn_epsilon=bn_epsilon,
                              bn_use_global_stats=bn_use_global_stats,
                              activation="relu6"))
            in_channels = final_block_channels
            self.features.add(nn.AvgPool2D(pool_size=7, strides=1))

            self.output = nn.HybridSequential(prefix="")
            self.output.add(nn.Flatten())
            self.output.add(nn.Dense(units=classes, in_units=in_channels))
Пример #14
0
import matplotlib.pyplot as plt
import numpy as np
from mxnet import nd, gluon, init, autograd
import mxnet as mx
from mxnet import gluon, init, nd, autograd
from mxnet.gluon import data as gdata, nn, loss as gloss, utils as gutils

net = nn.Sequential()
net.add(nn.Dense(200, activation='tanh', use_bias=False),
        nn.Dense(200, activation='tanh', use_bias=False),
        nn.Dense(200, activation='tanh', use_bias=False),
        nn.Dense(200, activation='tanh', use_bias=False))

X = nd.random.uniform(-1, 1, (1, 100))
net.initialize(force_reinit=True, init=init.Xavier())
X.attach_grad()
with autograd.record():
    Y = net[:1](X)
Y.backward()
y0 = X.grad
y0 = y0.asnumpy()
y0 = np.round(y0, 1)
y0 = list(y0[0])
#print(y0)
y0 = sorted(y0)
print(y0)
y0set = sorted(set(y0))
print(y0set)
n = 0
#plt.figure()
x = []
Пример #15
0
        # should be dot(X_, W)
        E = self.attn(X_)  # (n, hidden) -> (n, hidden)
        attn_weights = F.softmax(E, axis=1)  # (n, w)
        attn_applied = F.elemwise_mul(attn_weights, X_)  #(n,w)
        output = self.c * (F.elemwise_mul(X_,
                                          attn_weights)) + (1 - self.c) * X_
        output = self.out(output)  #(n,output_size)
        return output


net = nn.Sequential()
with net.name_scope():
    net.add(rnn.GRU(num_hidden, num_layers, layout='NTC')
            )  # T: sequence_length, N: batch_size, C: feature_dimension
    net.add(nn.BatchNorm())
    net.add(nn.Dense(sequence_length))  # this is to conver (nwc) to (nw)
    net.add(Attn(sequence_length,
                 num_hidden))  # last layer attn, in (nw) o (nw)

net.collect_params().initialize(mx.init.Normal(sigma=0.02), ctx=ctx)
print(net.collect_params)
#params = net.collect_params()
#params.load('try3.params', ctx=ctx)
square_loss = gluon.loss.L2Loss()
learning_settings = {'learning_rate': 0.005, 'momentum': 0.9}
trainer = gluon.Trainer(net.collect_params(), 'sgd', learning_settings)
#metric = mx.metric.MSE()

epochs = 20
loss_sequence = []
for e in range(epochs):
Пример #16
0
    def __init__(self,
                 levels,
                 channels,
                 classes=1000,
                 block=BasicBlock,
                 momentum=0.9,
                 norm_layer=BatchNorm,
                 norm_kwargs=None,
                 residual_root=False,
                 linear_root=False,
                 use_feature=False,
                 **kwargs):
        super(DLA, self).__init__(**kwargs)
        if norm_kwargs is None:
            norm_kwargs = {}
        norm_kwargs['momentum'] = momentum
        self._use_feature = use_feature
        self.channels = channels
        self.base_layer = nn.HybridSequential('base')
        self.base_layer.add(
            nn.Conv2D(in_channels=3,
                      channels=channels[0],
                      kernel_size=7,
                      strides=1,
                      padding=3,
                      use_bias=False))
        self.base_layer.add(norm_layer(in_channels=channels[0], **norm_kwargs))
        self.base_layer.add(nn.Activation('relu'))

        self.level0 = self._make_conv_level(channels[0], channels[0],
                                            levels[0], norm_layer, norm_kwargs)
        self.level1 = self._make_conv_level(channels[0],
                                            channels[1],
                                            levels[1],
                                            norm_layer,
                                            norm_kwargs,
                                            stride=2)
        self.level2 = Tree(levels[2],
                           block,
                           channels[1],
                           channels[2],
                           2,
                           level_root=False,
                           root_residual=residual_root,
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           prefix='level2_')
        self.level3 = Tree(levels[3],
                           block,
                           channels[2],
                           channels[3],
                           2,
                           level_root=True,
                           root_residual=residual_root,
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           prefix='level3_')
        self.level4 = Tree(levels[4],
                           block,
                           channels[3],
                           channels[4],
                           2,
                           level_root=True,
                           root_residual=residual_root,
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           prefix='level4_')
        self.level5 = Tree(levels[5],
                           block,
                           channels[4],
                           channels[5],
                           2,
                           level_root=True,
                           root_residual=residual_root,
                           norm_layer=norm_layer,
                           norm_kwargs=norm_kwargs,
                           prefix='level5_')

        if not self._use_feature:
            self.global_avg_pool = nn.GlobalAvgPool2D()
            self.fc = nn.Dense(units=classes)
Пример #17
0
 def __init__(self, **kwargs):
     super(SimpleModel, self).__init__(**kwargs)
     self.fc1 = nn.Dense(20)
     self.fc2 = nn.Dense(10)
Пример #18
0
    break

mnist_valid = gluon.data.vision.FashionMNIST(train=False)
valid_data = gluon.data.DataLoader(
        mnist_valid.transform_first(transformer),
        batch_size=batch_size, num_workers=4)

# Define model
net = nn.Sequential()
net.add(
    nn.Conv2D(channels=6, kernel_size=5, activation='relu'),
    nn.MaxPool2D(pool_size=2, strides=2),
    nn.Conv2D(channels=16, kernel_size=3, activation='relu'),
    nn.MaxPool2D(pool_size=2, strides=2),
    nn.Flatten(),
    nn.Dense(120, activation='relu'),
    nn.Dense(84, activation='relu'),
    nn.Dense(10))
net.initialize(init=init.Xavier())

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})


def acc(output, label):
    return (output.argmax(axis=1) == label.astype('float32')).mean().asscalar()

for epoch in range(10):
    train_loss, train_acc, valid_acc = 0., 0., 0.
    tic = time.time()
Пример #19
0
 def __init__(self):
     super(Critic, self).__init__()
     self.dense0 = nn.Dense(400, activation='relu')
     self.dense1 = nn.Dense(300, activation='relu')
     self.dense2 = nn.Dense(1)
Пример #20
0
 def __init__(self, in_units=0, **kwargs):
     super(Net, self).__init__(**kwargs)
     with self.name_scope():
         self.dense0 = nn.Dense(5, in_units=in_units)
         self.dense1 = nn.Dense(5, in_units=in_units)
Пример #21
0
def get_net():
    net = nn.HybridSequential()
    with net.name_scope():
        net.add(net_resnet50.features)
        net.add(nn.Dense(1062))
    return net
Пример #22
0
 def __init__(self, **kwargs):
     super(Net, self).__init__(**kwargs)
     with self.name_scope():
         self.dense0 = nn.Dense(10, in_units=5, use_bias=False)
Пример #23
0
    def __init__(self,
                 channels,
                 init_block_channels,
                 cardinality,
                 bottleneck_width,
                 group_widths,
                 refresh_steps,
                 bn_use_global_stats=False,
                 in_channels=3,
                 in_size=(224, 224),
                 classes=1000,
                 **kwargs):
        super(CRUNet, self).__init__(**kwargs)
        self.in_size = in_size
        self.classes = classes

        with self.name_scope():
            self.features = nn.HybridSequential(prefix='')
            self.features.add(
                ResInitBlock(in_channels=in_channels,
                             out_channels=init_block_channels,
                             bn_use_global_stats=bn_use_global_stats))
            in_channels = init_block_channels
            for i, channels_per_stage in enumerate(channels):
                stage = nn.HybridSequential(prefix="stage{}_".format(i + 1))
                group_width = group_widths[i]
                refresh_step = refresh_steps[i]
                with stage.name_scope():
                    for j, out_channels in enumerate(channels_per_stage):
                        strides = 2 if (j == 0) and (i != 0) else 1
                        if group_width != 0:
                            if ((refresh_step == 0) and
                                (j == 0)) or ((refresh_step != 0) and
                                              (j % refresh_step == 0)):
                                conv1_params = None
                                conv2_params = None
                            unit = CRUUnit(
                                in_channels=in_channels,
                                out_channels=out_channels,
                                strides=strides,
                                group_width=group_width,
                                bn_use_global_stats=bn_use_global_stats,
                                conv1_params=conv1_params,
                                conv2_params=conv2_params)
                            if conv1_params is None:
                                conv1_params = unit.body.conv1.conv.params
                                conv2_params = unit.body.conv2.params
                            stage.add(unit)
                        else:
                            stage.add(
                                ResUnit(
                                    in_channels=in_channels,
                                    out_channels=out_channels,
                                    strides=strides,
                                    cardinality=cardinality,
                                    bottleneck_width=bottleneck_width,
                                    bn_use_global_stats=bn_use_global_stats))
                        in_channels = out_channels
                self.features.add(stage)
            self.features.add(
                PreResActivation(in_channels=in_channels,
                                 bn_use_global_stats=bn_use_global_stats))
            self.features.add(nn.AvgPool2D(pool_size=7, strides=1))

            self.output = nn.HybridSequential(prefix='')
            self.output.add(nn.Flatten())
            self.output.add(nn.Dense(units=classes, in_units=in_channels))
Пример #24
0
 def __init__(self, **kwargs):
     super(Model1, self).__init__(**kwargs)
     with self.name_scope():
         self.layers = [nn.Dense(i * 10) for i in range(6)]
Пример #25
0
    def __init__(self,
                 repeat=6,
                 penultimate_filters=4032,
                 stem_filters=96,
                 filters_multiplier=2,
                 classes=1000,
                 use_aux=True):
        super(NASNetALarge, self).__init__()

        filters = penultimate_filters // 24

        self.conv0 = nn.HybridSequential(prefix='')
        self.conv0.add(
            nn.Conv2D(stem_filters, 3, padding=0, strides=2, use_bias=False))
        self.conv0.add(nn.BatchNorm(momentum=0.1, epsilon=0.001))

        self.cell_stem_0 = CellStem0(stem_filters,
                                     num_filters=filters //
                                     (filters_multiplier**2))
        self.cell_stem_1 = CellStem1(num_filters=filters // filters_multiplier)

        self.norm_1 = nn.HybridSequential(prefix='')
        self.norm_1.add(
            FirstCell(out_channels_left=filters // 2,
                      out_channels_right=filters))
        for _ in range(repeat - 1):
            self.norm_1.add(
                NormalCell(out_channels_left=filters,
                           out_channels_right=filters))

        self.reduction_cell_0 = ReductionCell0(out_channels_left=2 * filters,
                                               out_channels_right=2 * filters)

        self.norm_2 = nn.HybridSequential(prefix='')
        self.norm_2.add(
            FirstCell(out_channels_left=filters,
                      out_channels_right=2 * filters))
        for _ in range(repeat - 1):
            self.norm_2.add(
                NormalCell(out_channels_left=2 * filters,
                           out_channels_right=2 * filters))

        if use_aux:
            self.out_aux = nn.HybridSequential(prefix='')
            self.out_aux.add(
                nn.Conv2D(filters // 3, kernel_size=1, use_bias=False))
            self.out_aux.add(nn.BatchNorm(epsilon=0.001))
            self.out_aux.add(nn.Activation('relu'))
            self.out_aux.add(
                nn.Conv2D(2 * filters, kernel_size=5, use_bias=False))
            self.out_aux.add(nn.BatchNorm(epsilon=0.001))
            self.out_aux.add(nn.Activation('relu'))
            self.out_aux.add(nn.Dense(classes))
        else:
            self.out_aux = None

        self.reduction_cell_1 = ReductionCell1(out_channels_left=4 * filters,
                                               out_channels_right=4 * filters)

        self.norm_3 = nn.HybridSequential(prefix='')
        self.norm_3.add(
            FirstCell(out_channels_left=2 * filters,
                      out_channels_right=4 * filters))
        for _ in range(repeat - 1):
            self.norm_3.add(
                NormalCell(out_channels_left=4 * filters,
                           out_channels_right=4 * filters))

        self.out = nn.HybridSequential(prefix='')
        self.out.add(nn.Activation('relu'))
        self.out.add(nn.GlobalAvgPool2D())
        self.out.add(nn.Dropout(0.5))
        self.out.add(nn.Dense(classes))
Пример #26
0
 def __init__(self, **kwargs):
     super(Model2, self).__init__(**kwargs)
     with self.name_scope():
         self.layers = dict()
         self.layers['a'] = [nn.Dense(10), nn.Dense(10)]
Пример #27
0
def get_net():
    net = nn.Sequential()
    net.add(nn.Dense(64, activation="relu"), nn.Dense(32, activation="relu"),
            nn.Dense(1))
    return net
Пример #28
0
 def __init__(self, **kwargs):
     super(Model3, self).__init__(**kwargs)
     with self.name_scope():
         self.layers = nn.Sequential()
         self.layers.add(*[nn.Dense(i * 10) for i in range(6)])
Пример #29
0
class MySequential(nn.Block):
    def __init__(self, **kwargs):
        super(MySequential, self).__init__(**kwargs)

    def add(self, block):
        self._children[block.name] = block

    def forward(self, x):
        for block in self._children.values():
            x = block
        return x


net = MySequential()
net.add(nn.Dense(256, activation='relu'))

net.initialize()
net(x)


class FancyMLP(nn.Block):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)
        self.rand_weight = self.params.get_constant(
            'rand_weight', nd.random.uniform(shape=(20, 20)))

        self.dense = nn.Dense(20, activation='relu')

    def forward(self, x):
        x = self.dense(x)
Пример #30
0
print("Y_train: " + str(Y_train))
print("Y_test: " + str(Y_test))

## define network
num_classes = 2
num_hidden = 200
learning_rate = .01
epochs = 200
batch_size = 20

model = nn.Sequential()
with model.name_scope():
    model.embed = nn.Embedding(voca_size, num_embed)
    model.add(
        rnn.LSTM(num_hidden, layout='NTC', dropout=0.7, bidirectional=False))
    model.add(nn.Dense(num_classes))


def eval_accuracy(x, y, batch_size):
    accuracy = mx.metric.Accuracy()

    for i in range(x.shape[0] // batch_size):
        data = x[i * batch_size:(i * batch_size + batch_size), ]
        target = y[i * batch_size:(i * batch_size + batch_size), ]

        output = model(data)
        predictions = nd.argmax(output, axis=1)
        accuracy.update(preds=predictions, labels=target)

    return accuracy.get()[1]