def products(A): x = np.arange(4) y = np.ones(4) print("x . y : {}, {}".format(np.dot(x, y), np.sum(x * y))) print("A . x : {} has shape {}".format(np.dot(A, x), np.dot(A, x).shape)) B = np.ones(shape=(4, 3)) print("A . B : {} has shape {}".format(np.dot(A, B), np.dot(A, B).shape)) print("{}.{} has shape {}".format(A.shape, B.shape, np.dot(A, B).shape))
def test_dot(): A = np.ones((1, INT_OVERFLOW), dtype='float32') B = np.ones((INT_OVERFLOW, 1), dtype='float32') A.attach_grad() with mx.autograd.record(): C = np.dot(A, B) assert_almost_equal(C.asnumpy(), [INT_OVERFLOW], rtol=1e-5, atol=1e-5) C.backward() assert A.grad.shape == (1, INT_OVERFLOW)
def test_ctc_loss(): A = np.ones((2, INT_OVERFLOW, 4)) A.attach_grad() with mx.autograd.record(): B = npx.ctc_loss(A, np.ones((INT_OVERFLOW, 2))) assert B.shape == (INT_OVERFLOW, ) assert type(B).__name__ == 'ndarray' B.backward() assert A.grad.shape == (2, INT_OVERFLOW, 4) assert A.grad[0][0][0] == 0
def test_layer_norm(): A = np.ones((2, INT_OVERFLOW)) A.attach_grad() with mx.autograd.record(): B = npx.layer_norm(A, gamma=np.ones((2)), beta=np.zeros((2)), axis=0) assert B.shape == (2, INT_OVERFLOW) assert B[0][0] == 0 B.backward() assert A.grad.shape == (2, INT_OVERFLOW) assert A.grad[0][0] == 0
def test_append(): A = np.ones((1, INT_OVERFLOW)) B = np.ones((2, INT_OVERFLOW)) A.attach_grad() with mx.autograd.record(): C = np.append(A, B, axis=0) assert C.shape == (3, INT_OVERFLOW) assert C[2][0] == 1 C.backward() assert A.grad.shape == (1, INT_OVERFLOW) assert A[0][0] == 1
def test_add(): INT_OVERFLOW = 2**30 A = np.ones((INT_OVERFLOW, 2)) B = np.ones((INT_OVERFLOW, 2)) A.attach_grad() with mx.autograd.record(): C = np.add(A, B) assert C.shape == (INT_OVERFLOW, 2) assert C[0][0] == 2 C.backward() assert A.grad.shape == (INT_OVERFLOW, 2) assert A.grad[0][0] == 1
def test_hypot(): A = np.ones((INT_OVERFLOW, 2)) B = np.ones((INT_OVERFLOW, 2)) A[-1, -1], B[-1, -1] = 3, 4 A.attach_grad() with mx.autograd.record(): C = np.hypot(A, B) C.backward() assert C.shape == A.shape assert C[-1, -1] == 5 assert A.grad.shape == A.shape assert_almost_equal(A.grad[-1, -1], np.array([0.6]), rtol=1e-5, atol=1e-5)
def test_add(): A = np.ones((INT_OVERFLOW, 2)) B = np.ones((INT_OVERFLOW, 2)) A[-1, -1] = 2 A.attach_grad() with mx.autograd.record(): C = np.add(A, B) C.backward() assert C.shape == (INT_OVERFLOW, 2) assert C[-1, -1] == 3 assert A.grad.shape == (INT_OVERFLOW, 2) assert A.grad[-1, -1] == 1
def test_fully_connected(): a = np.ones(shape=(LARGE_X, SMALL_Y)) b = np.ones(shape=(SMALL_Y, SMALL_Y)) c = np.ones(shape=(b.shape[0], )) # w/o bias res = mx.npx.fully_connected(a, b, num_hidden=b.shape[0], no_bias=True) assert np.sum(res[-1] == a.shape[1]) == b.shape[0] # w/ bias res = mx.npx.fully_connected(a, b, c, num_hidden=b.shape[0], no_bias=False) assert np.sum(res[-1] == a.shape[1] + 1) == b.shape[0]
def test_lcm(): inp1 = np.ones((2, INT_OVERFLOW), dtype='int32') inp2 = np.ones((2, INT_OVERFLOW), dtype='int32') inp1[-1, -1] = 3 inp2[-1, -1] = 5 inp1.attach_grad() with mx.autograd.record(): out = np.lcm(inp1, inp2) out.backward() assert out.shape == inp1.shape assert out[-1, -1] == 15 assert inp1.grad.shape == inp1.shape assert inp1.grad[-1, -1] == 0
def test_concatenate(): def batch_check(x1, x2, axises, shapes): for a, s in zip(axises, shapes): x1.attach_grad() with mx.autograd.record(): y = np.concatenate((x1, x2), axis=a) assert y.shape == s y.backward() assert x1.grad.shape == (2, INT_OVERFLOW) assert x1.grad[0][0] == 1 A = np.ones((2, INT_OVERFLOW)) B = np.ones((1, INT_OVERFLOW)) batch_check(A, B, [0, None], \ [(3, INT_OVERFLOW), (int(INT_OVERFLOW * 3), )])
def test_batch_norm(): A = np.ones((2, INT_OVERFLOW)) gamma = np.ones((2)) beta = np.zeros((2)) mov_mean = np.ones((2)) mov_var = np.ones((2)) A.attach_grad() with mx.autograd.record(): B = npx.batch_norm(A, gamma, beta, mov_mean, mov_var) assert B.shape == (2, INT_OVERFLOW) assert B[0][0] == 0 B.backward() assert A.grad.shape == (2, INT_OVERFLOW) assert A.grad[0][0] == 0
def test_multiply(): A = np.ones((2, INT_OVERFLOW)) B = np.ones((2, INT_OVERFLOW)) A[-1, -1], B[-1, -1] = 2, 3 A.attach_grad() B.attach_grad() with mx.autograd.record(): C = np.multiply(A, B) C.backward() assert C.shape == A.shape assert C[0, 0] == 1 and C[-1, -1] == 6 assert A.grad.shape == A.shape assert A.grad[-1, -1] == B[-1, -1] assert B.grad.shape == B.shape assert B.grad[-1, -1] == A[-1, -1]
def test_bitwise_family(): def batch_check(x1, x2, funcs): for f in funcs: y = f(x1, x2) one = np.ones((1), dtype='int32') assert y.shape == (INT_OVERFLOW, 2) assert y[0][0] == f(one, one) # test on broadcast input A = np.ones((INT_OVERFLOW, 1), dtype='int32') B = np.ones((INT_OVERFLOW, 2), dtype='int32') batch_check(A, B, [np.bitwise_and, np.bitwise_or, np.bitwise_xor]) C = np.bitwise_not(A) assert C.shape == (INT_OVERFLOW, 1) assert C[0] == np.bitwise_not(np.ones((1), dtype='int32'))
def test_fmod(): inp1 = np.ones((INT_OVERFLOW, 2)) inp2 = np.ones((INT_OVERFLOW, 1)) inp1[-1, -1], inp2[-1, -1] = 11, 7 inp1.attach_grad() inp2.attach_grad() with mx.autograd.record(): out = np.fmod(inp1, inp2) out.backward() assert out.shape == inp1.shape assert out[-1, -1] == 4 assert inp1.grad.shape == inp1.shape assert inp1.grad[0, 0] == 1 assert inp2.grad.shape == inp2.shape assert inp2.grad[-1] == -1 and inp2.grad[0] == -2
def test_np_einsum(): print("Path optimization test:") # Basic einsum a = np.ones(64).reshape(2,4,8) args = ['ijk,ilm,njm,nlk,abc->', a, a, a, a, a] cost = measure_cost(500, np.einsum, *args) print("Basic einsum: {} ms".format(cost * 1000)) # Sub-optimal einsum # cost = measure_cost(500, np.einsum, *args, optimize='optimal') # print("Optimal einsum: {} ms".format(cost * 1000)) # Greedy einsum cost = measure_cost(500, np.einsum, *args, optimize=True) print("Greedy einsum: {} ms".format(cost * 1000)) print("RNN Use Case:") a = np.random.uniform(0, 1, size=(64, 128, 512)) b = np.random.uniform(0, 1, size=(128, 512, 2, 2)) args = ['bij, ijkl->bkl', a, b] cost = measure_cost(2, np.einsum, *args, optimize=True) print('Greedy einsum: {} ms'.format(cost * 1000)) cost = measure_cost(2, np.einsum, *args) print('Basic einsum: {} ms'.format(cost * 1000)) print('Inner Product:') a = np.ones(6000000) b = np.ones(6000000) args = [a, b] cost = measure_cost(50, np.tensordot, *args, axes=([0],[0])) print('Tensordot: {} ms'.format(cost * 1000)) args = ['i, i', a, b] cost = measure_cost(50, np.einsum, *args, optimize=True) print('Greedy einsum: {} ms'.format(cost * 1000)) cost = measure_cost(50, np.einsum, *args) print('Basic einsum: {} ms'.format(cost * 1000)) print('Matrix Product:') a = np.ones(600000).reshape(200, 3000) b = np.ones(600000).reshape(3000, 200) args = [a, b] cost = measure_cost(50, np.tensordot, *args, axes=([1],[0])) print('Tensordot: {} ms'.format(cost * 1000)) args = ['ij, jk', a, b] cost = measure_cost(50, np.einsum, *args, optimize=True) print('Greedy einsum: {} ms'.format(cost * 1000)) cost = measure_cost(50, np.einsum, *args) print('Basic einsum: {} ms'.format(cost * 1000))
def test_mx_pt_eq_embedding(vocab_size, num_embed, factor_configs, sparse): pytest.importorskip("mxnet") import sockeye.encoder from mxnet import np config = sockeye.encoder.EmbeddingConfig(vocab_size=vocab_size, num_embed=num_embed, dropout=0, factor_configs=factor_configs, allow_sparse_grad=sparse) block_mx = sockeye.encoder.Embedding(config, None, C.DTYPE_FP32) block_mx.initialize() block_pt = sockeye.encoder_pt.PyTorchEmbedding(config, None) block_pt.weights_from_mxnet_block(block_mx) batch, seq_len, num_factors = 4, 10, len( factor_configs) + 1 if factor_configs is not None else 1 # data_mx does not take into account different vocab sizes for factors data_mx = np.random.randint(0, vocab_size, (batch, seq_len, num_factors)) data_pt = pt.as_tensor(data_mx.asnumpy()) vl_mx = np.ones((1, )) # not used vl_pt = pt.as_tensor(vl_mx.asnumpy()) r_mx, _ = block_mx(data_mx, vl_mx) r_pt = block_pt(data_pt) r_mx = r_mx.asnumpy() r_pt = r_pt.detach().numpy() assert np.allclose(r_mx, r_pt)
def test_copysign(): A = np.ones((INT_OVERFLOW, 2)) #A.attach_grad() #with mx.autograd.record(): B = np.copysign(A, -1) assert B.shape == (INT_OVERFLOW, 2) assert B[0][0] == -1
def test_greedytop1(target_vocab_size): pytest.importorskip("mxnet") from mxnet import np import sockeye.beam_search batch_size = 1 beam_size = 1 target_vocab_size = 50 # Random model scores. Shape: (batch_size * beam_size, target_vocab_size) scores = np.random.uniform(0, 1, (batch_size * beam_size, target_vocab_size)) expected_hyp_index, expected_word_index, expected_value = numpy_topk( scores, k=beam_size, offset=None) assert expected_hyp_index[0] == 0 assert expected_value.shape == (1, 1) greedy_top1 = sockeye.beam_search.GreedyTop1() greedy_top1.initialize() best_word_index = greedy_top1(scores, None, None) assert best_word_index.shape == (1, 1) assert best_word_index[0, 0] == expected_word_index[0] target_factors = np.ones((1, 1), dtype='int32') best_word_index_with_factors = greedy_top1(scores, None, target_factors) assert best_word_index_with_factors.shape == (1, 2) assert best_word_index_with_factors[0, 0] == expected_word_index[0] assert best_word_index_with_factors[0, 1] == target_factors.item()
def forward(self, logits: np.ndarray, labels: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: pred = npx.log_softmax(logits, axis=-1) # (batch, len) neg_log_likelihood = - npx.pick(pred, # pylint: disable=invalid-unary-operand-type labels, axis=-1, keepdims=False) # label smoothing as in # https://github.com/dmlc/gluon-nlp/blob/b714eaccc67619d7bdcbd1574d30be87d9c73f0c/src/gluonnlp/loss.py#L4 if self._alpha > 0: all_scores = np.sum(pred, axis=-1) neg_log_likelihood = (1 - self._alpha) * neg_log_likelihood - self._alpha / self._num_labels * all_scores # (batch, len,) valid_mask = labels != self.ignore_label # (batch, len) loss = neg_log_likelihood * valid_mask # (1,) num_valid = np.sum(valid_mask) # (1,) ce = np.sum(loss) * self.weight # we need to divide by num_valid here to backpropagate a 'valid' normalized loss value like in SoftmaxOutput. return ce / num_valid, np.ones((1,))
def test_ldexp(): A = np.ones((2, INT_OVERFLOW)) B = np.ones((2, INT_OVERFLOW)) A[-1, -1], B[-1, -1] = 5, 2 A.attach_grad() B.attach_grad() with mx.autograd.record(): C = np.ldexp(A, B) C.backward() assert C.shape == A.shape assert C[-1, -1] == 20 assert A.grad.shape == A.shape assert A.grad[-1, -1] == 4 assert B.grad.shape == B.shape assert_almost_equal(B.grad[-1, -1], A[-1, -1] * 2**B[-1, -1] * np.log(2), \ rtol=1e-5, atol=1e-5)
def test_save_load(): A = np.ones((2, INT_OVERFLOW), dtype='int8') A[0][100] = 100 npx.save('my_tensor', A) B = np.array(npx.load('my_tensor')) assert B[0].shape == (2, INT_OVERFLOW) assert B[0][0][100] == 100
def test_hybrid_block_multiple_outputs(): @use_np class TestAllNumpyOutputs(HybridBlock): def hybrid_forward(self, F, x, *args, **kwargs): return F.np.add(x, x), F.np.multiply(x, x) class TestAllClassicOutputs(HybridBlock): def hybrid_forward(self, F, x, *args, **kwargs): return x.as_nd_ndarray() + x.as_nd_ndarray( ), x.as_nd_ndarray() * x.as_nd_ndarray() data_np = np.ones((2, 3)) for block, expected_out_type in [(TestAllClassicOutputs, mx.nd.NDArray), (TestAllNumpyOutputs, np.ndarray)]: net = block() for hybridize in [True, False]: if hybridize: net.hybridize() out1, out2 = net(data_np) assert type(out1) is expected_out_type assert type(out2) is expected_out_type @use_np class TestMixedTypeOutputsFailure(HybridBlock): def hybrid_forward(self, F, x, *args, **kwargs): return x.as_nd_ndarray() + x.as_nd_ndarray(), F.np.multiply(x, x) net = TestMixedTypeOutputsFailure() assert_exception(net, TypeError, data_np) net.hybridize() assert_exception(net, TypeError, data_np)
def check_ones_array_creation(shape, dtype): np_out = _np.ones(shape=shape, dtype=dtype) mx_out = np.ones(shape=shape, dtype=dtype) assert same(mx_out.asnumpy(), np_out) if dtype is None: assert mx_out.dtype == _np.float32 assert np_out.dtype == _np.float64
def test_to_tensor(): # 3D Input data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8')) assert_almost_equal( out_nd.asnumpy(), np.transpose(data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1))) # 4D Input data_in = np.random.uniform(0, 255, (5, 300, 300, 3)).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8')) assert_almost_equal( out_nd.asnumpy(), np.transpose(data_in.astype(dtype=np.float32) / 255.0, (0, 3, 1, 2))) # Invalid Input invalid_data_in = np.random.uniform( 0, 255, (5, 5, 300, 300, 3)).astype(dtype=np.uint8) transformer = transforms.ToTensor() assertRaises(MXNetError, transformer, invalid_data_in) # Bounds (0->0, 255->1) data_in = np.zeros((10, 20, 3)).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8')) assert same( out_nd.asnumpy(), np.transpose(np.zeros(data_in.shape, dtype=np.float32), (2, 0, 1))) data_in = np.full((10, 20, 3), 255).astype(dtype=np.uint8) out_nd = transforms.ToTensor()(np.array(data_in, dtype='uint8')) assert same( out_nd.asnumpy(), np.transpose(np.ones(data_in.shape, dtype=np.float32), (2, 0, 1)))
def test_all(): A = np.ones((INT_OVERFLOW, 2)) A.attach_grad() with mx.autograd.record(): B = np.all(A) assert B == True B.backward() assert A.grad.shape == (INT_OVERFLOW, 2) assert A.grad[0][0] == 0
def test_slice_assign(): # test _slice_assign A = np.zeros((INT_OVERFLOW, 2)) A[-1] = np.ones((1)) assert A[-1, 0] == 1 and A[-1, 1] == 1 # test _slice_assign_scalar B = np.zeros((INT_OVERFLOW, 2)) B[-1] = 2 assert B[-1, 0] == 2 and B[-1, 1] == 2
def test_softmax(): input_data = np.ones((SMALL_Y, LARGE_X)) for axis in [0, 1]: true_output = np.full((SMALL_Y, LARGE_X), (1 / input_data.shape[axis])) output = npx.softmax(input_data, axis=axis) assert_almost_equal(output.asnumpy(), true_output, rtol=1e-5, atol=1e-5)
def test_stop_gradient(): A = np.ones((INT_OVERFLOW, 2)) A.attach_grad() with mx.autograd.record(): B = npx.stop_gradient(A * 3) assert B.shape == (INT_OVERFLOW, 2) assert B[0][0] == 3 B.backward() # should be 3 if not for stop_gradient() assert A.grad[0][0] == 0
def test_reshape_like(): A = np.ones((INT_OVERFLOW, 2)) A.attach_grad() with mx.autograd.record(): B = npx.reshape_like(A, np.zeros((2, INT_OVERFLOW))) assert B.shape == (2, INT_OVERFLOW) assert B[0][0] == 1 B.backward() assert A.grad.shape == (INT_OVERFLOW, 2) assert A.grad[0][0] == 1