示例#1
0
 def min(tensor, *args, **kwargs):
     if isinstance(tensor, nd.NDArray):
         return nd.min(tensor, *args, **kwargs).asscalar()
     else:
         return numpy.min(tensor, *args, **kwargs)
示例#2
0
def test_min():
    a = create_vector(size=LARGE_X)
    b = nd.min(a, axis=0)
    assert b[0] == 0
    assert b[-1] == 0
def max(tensor, *args, **kwargs):
    return np.min(tensor, *args, **kwargs).asscalar()
示例#4
0
def clip_grad_global_norm(parameters, max_norm, check_isfinite=True):
    """Rescales gradients of parameters so that the sum of their 2-norm is smaller than `max_norm`.
    If gradients exist for more than one context for a parameter, user needs to explicitly call
    ``trainer.allreduce_grads`` so that the gradients are summed first before calculating
    the 2-norm.

    .. note::

        This function is only for use when `update_on_kvstore` is set to False in trainer.
        In cases where training happens on multiple contexts, this method should be used in
        conjunction with ``trainer.allreduce_grads()`` and ``trainer.update()``.
        (**not** ``trainer.step()``)

    Example::

        trainer = Trainer(net.collect_params(), update_on_kvstore=False, ...)
        for x, y in mx.gluon.utils.split_and_load(X, [mx.gpu(0), mx.gpu(1)]):
            with mx.autograd.record():
                y = net(x)
                loss = loss_fn(y, label)
            loss.backward()
        trainer.allreduce_grads()
        nlp.utils.clip_grad_global_norm(net.collect_params().values(), max_norm)
        trainer.update(batch_size)
        ...

    Parameters
    ----------
    parameters : list of Parameters
    max_norm : float
    check_isfinite : bool, default True
         If True, check that the total_norm is finite (not nan or inf). This
         requires a blocking .asscalar() call.

    Returns
    -------
    NDArray or float
      Total norm. Return type is NDArray of shape (1,) if check_isfinite is
      False. Otherwise a float is returned.

    """
    def _norm(array):
        if array.stype == 'default':
            x = array.reshape((-1))
            return nd.dot(x, x)
        return array.norm().square()

    arrays = []
    i = 0
    for p in parameters:
        if p.grad_req != 'null':
            grad_list = p.list_grad()
            arrays.append(grad_list[i % len(grad_list)])
            i += 1
    assert len(
        arrays) > 0, 'No parameter found available for gradient norm clipping.'
    ctx, dtype = arrays[0].context, arrays[0].dtype
    total_norm = nd.add_n(*[_norm(arr).as_in_context(ctx) for arr in arrays])
    total_norm = nd.sqrt(total_norm)
    if check_isfinite:
        total_norm = total_norm.asscalar()
        if not np.isfinite(total_norm):
            warnings.warn(UserWarning('nan or inf is detected. '
                                      'Clipping results will be undefined.'),
                          stacklevel=2)
    scale = max_norm / (total_norm + 1e-8)
    if check_isfinite:
        scale = nd.array([scale], dtype=dtype, ctx=ctx)
    scale = nd.min(
        nd.concat(scale, nd.ones((1, ), dtype=dtype, ctx=ctx), dim=0))
    for p in parameters:
        if p.grad_req != 'null':
            for arr in p.list_grad():
                arr *= scale.as_in_context(arr.context)
    return total_norm
示例#5
0
def test_min():
    a = create_2d_tensor(rows=SMALL_Y, columns=LARGE_X)
    b = nd.min(a, axis=0)
    assert b[0] == 0
    assert b[-1] == 0
    return train_dataloader, validation_dataloader


# In[20]:


t, v = get_mnist_data()
assert isinstance(t, gluon.data.DataLoader)
assert isinstance(v, gluon.data.DataLoader)

d, l = next(iter(t))
assert d.shape == (128, 1, 28, 28) #check Channel First and Batch Size
assert l.shape == (128,)

assert nd.max(d).asscalar() <= 2.9 # check for normalization
assert nd.min(d).asscalar() >= -0.5 # check for normalization


# ---
# 
# ## Question 2
# 
# ### Write the training loop
# 
# * Create the loss function. This should be a loss function suitable for multi-class classification.
# * Create the metric accumulator. This should the compute and store the accuracy of the model during training
# * Create the trainer with the `adam` optimizer and learning rate of `0.002`
# * Write the training loop

# In[24]: