def min(tensor, *args, **kwargs): if isinstance(tensor, nd.NDArray): return nd.min(tensor, *args, **kwargs).asscalar() else: return numpy.min(tensor, *args, **kwargs)
def test_min(): a = create_vector(size=LARGE_X) b = nd.min(a, axis=0) assert b[0] == 0 assert b[-1] == 0
def max(tensor, *args, **kwargs): return np.min(tensor, *args, **kwargs).asscalar()
def clip_grad_global_norm(parameters, max_norm, check_isfinite=True): """Rescales gradients of parameters so that the sum of their 2-norm is smaller than `max_norm`. If gradients exist for more than one context for a parameter, user needs to explicitly call ``trainer.allreduce_grads`` so that the gradients are summed first before calculating the 2-norm. .. note:: This function is only for use when `update_on_kvstore` is set to False in trainer. In cases where training happens on multiple contexts, this method should be used in conjunction with ``trainer.allreduce_grads()`` and ``trainer.update()``. (**not** ``trainer.step()``) Example:: trainer = Trainer(net.collect_params(), update_on_kvstore=False, ...) for x, y in mx.gluon.utils.split_and_load(X, [mx.gpu(0), mx.gpu(1)]): with mx.autograd.record(): y = net(x) loss = loss_fn(y, label) loss.backward() trainer.allreduce_grads() nlp.utils.clip_grad_global_norm(net.collect_params().values(), max_norm) trainer.update(batch_size) ... Parameters ---------- parameters : list of Parameters max_norm : float check_isfinite : bool, default True If True, check that the total_norm is finite (not nan or inf). This requires a blocking .asscalar() call. Returns ------- NDArray or float Total norm. Return type is NDArray of shape (1,) if check_isfinite is False. Otherwise a float is returned. """ def _norm(array): if array.stype == 'default': x = array.reshape((-1)) return nd.dot(x, x) return array.norm().square() arrays = [] i = 0 for p in parameters: if p.grad_req != 'null': grad_list = p.list_grad() arrays.append(grad_list[i % len(grad_list)]) i += 1 assert len( arrays) > 0, 'No parameter found available for gradient norm clipping.' ctx, dtype = arrays[0].context, arrays[0].dtype total_norm = nd.add_n(*[_norm(arr).as_in_context(ctx) for arr in arrays]) total_norm = nd.sqrt(total_norm) if check_isfinite: total_norm = total_norm.asscalar() if not np.isfinite(total_norm): warnings.warn(UserWarning('nan or inf is detected. ' 'Clipping results will be undefined.'), stacklevel=2) scale = max_norm / (total_norm + 1e-8) if check_isfinite: scale = nd.array([scale], dtype=dtype, ctx=ctx) scale = nd.min( nd.concat(scale, nd.ones((1, ), dtype=dtype, ctx=ctx), dim=0)) for p in parameters: if p.grad_req != 'null': for arr in p.list_grad(): arr *= scale.as_in_context(arr.context) return total_norm
def test_min(): a = create_2d_tensor(rows=SMALL_Y, columns=LARGE_X) b = nd.min(a, axis=0) assert b[0] == 0 assert b[-1] == 0
return train_dataloader, validation_dataloader # In[20]: t, v = get_mnist_data() assert isinstance(t, gluon.data.DataLoader) assert isinstance(v, gluon.data.DataLoader) d, l = next(iter(t)) assert d.shape == (128, 1, 28, 28) #check Channel First and Batch Size assert l.shape == (128,) assert nd.max(d).asscalar() <= 2.9 # check for normalization assert nd.min(d).asscalar() >= -0.5 # check for normalization # --- # # ## Question 2 # # ### Write the training loop # # * Create the loss function. This should be a loss function suitable for multi-class classification. # * Create the metric accumulator. This should the compute and store the accuracy of the model during training # * Create the trainer with the `adam` optimizer and learning rate of `0.002` # * Write the training loop # In[24]: