def function_plot(): x = np.arange(0, 5, 0.1) d2l.plot(x, [f(x), 2 * x - 3], 'x', 'f(x)', legend=['f(x)', 'Tangent line (x=1)']) plt.show()
def plot_normal_distributions(): """ Plot normal distributions with different mean (mu) and variance (sigma) values to demonstrate what effects different means and variances has on a normal distribution. """ # Create an evenly spaced vector from -7 to 7 with 0.01 as the spacing. x = np.arange(-7, 7, 0.01) # Different parameters to be used for mean and sigma, respectively. parameters = [(0, 1), (0, 2), (3, 1)] d2l.plot( x, [ compute_normal_distribution(x, mean, variance) for mean, variance in parameters ], xlabel="z", ylabel="p(z)", figsize=(4.5, 2.5), legend=[ f"mean {mean}, var {variance}" for mean, variance in parameters ], ) d2l.plt.savefig("normal_distributions")
def _add_workload_broadcast_to(): OpArgMngr.add_workload('broadcast_to', np.array(0), (0,)) OpArgMngr.add_workload('broadcast_to', np.array(0), (1,)) OpArgMngr.add_workload('broadcast_to', np.array(0), (3,)) OpArgMngr.add_workload('broadcast_to', np.ones(1), (1,)) OpArgMngr.add_workload('broadcast_to', np.ones(1), (2,)) OpArgMngr.add_workload('broadcast_to', np.ones(1), (1, 2, 3)) OpArgMngr.add_workload('broadcast_to', np.arange(3), (3,)) OpArgMngr.add_workload('broadcast_to', np.arange(3), (1, 3)) OpArgMngr.add_workload('broadcast_to', np.arange(3), (2, 3)) OpArgMngr.add_workload('broadcast_to', np.ones(0), 0) OpArgMngr.add_workload('broadcast_to', np.ones(1), 1) OpArgMngr.add_workload('broadcast_to', np.ones(1), 2) OpArgMngr.add_workload('broadcast_to', np.ones(1), (0,)) OpArgMngr.add_workload('broadcast_to', np.ones((1, 2)), (0, 2)) OpArgMngr.add_workload('broadcast_to', np.ones((2, 1)), (2, 0))
def test_topk_func(batch_size, beam_size, target_vocab_size): pytest.importorskip("mxnet") from mxnet import np import sockeye.beam_search # Random model scores. Shape: (batch_size * beam_size, target_vocab_size) scores = np.random.uniform(0, 1, (batch_size * beam_size, target_vocab_size)) # offset for batch sizes > 1 offset = np.repeat( np.arange(0, batch_size * beam_size, beam_size, dtype='int32'), beam_size) np_hyp, np_word, np_values = numpy_topk(scores.asnumpy(), k=beam_size, offset=offset) topk = sockeye.beam_search.TopK(k=beam_size) topk.initialize() mx_hyp, mx_word, mx_values = topk(scores, offset) assert np.allclose(mx_hyp, np_hyp) assert np.allclose(mx_word, np_word) assert np.allclose(mx_values, np_values) topk.hybridize() mx_hyp, mx_word, mx_values = topk(scores, offset) assert np.allclose(mx_hyp, np_hyp) assert np.allclose(mx_word, np_word) assert np.allclose(mx_values, np_values)
def _add_workload_power(array_pool): OpArgMngr.add_workload('power', array_pool['4x1'], array_pool['1x2']) OpArgMngr.add_workload('power', array_pool['4x1'], 2) OpArgMngr.add_workload('power', 2, array_pool['4x1']) OpArgMngr.add_workload('power', array_pool['4x1'], array_pool['1x1x0']) OpArgMngr.add_workload('power', np.array([1, 2, 3], np.int32), 2.00001) OpArgMngr.add_workload('power', np.array([15, 15], np.int64), np.array([15, 15], np.int64)) OpArgMngr.add_workload('power', 0, np.arange(1, 10))
def products(A): x = np.arange(4) y = np.ones(4) print("x . y : {}, {}".format(np.dot(x, y), np.sum(x * y))) print("A . x : {} has shape {}".format(np.dot(A, x), np.dot(A, x).shape)) B = np.ones(shape=(4, 3)) print("A . B : {} has shape {}".format(np.dot(A, B), np.dot(A, B).shape)) print("{}.{} has shape {}".format(A.shape, B.shape, np.dot(A, B).shape))
def multibox_prior(data, sizes, ratios): #data: batch, channels, height, width in_height, in_width = data.shape[-2:] device, num_sizes, num_ratios = data.ctx, len(sizes), len(ratios) boxes_per_pixel = num_sizes + num_ratios - 1 size_tensor = np.array(sizes, ctx=device) ratio_tensor = np.array(ratios, ctx=device) # Offsets are required to move the anchor to center of a pixel # Since pixel (height=1, width=1), we choose to offset our centers by 0.5 offset_w, offset_h = 0.5, 0.5 steps_h = 1.0 / in_height # Scaled steps in y axis steps_w = 1.0 / in_width # Scaled steps in x axis # Generate all center points for the anchor boxes center_h = (np.arange(in_height, ctx=device) + offset_h) * steps_h center_w = (np.arange(in_width, ctx=device) + offset_w) * steps_w shift_x, shift_y = np.meshgrid(center_w, center_h) shift_x, shift_y = shift_x.reshape(-1), shift_y.reshape(-1) # Generate boxes_per_pixel number of heights and widths which are later # used to create anchor box corner coordinates (xmin, xmax, ymin, ymax) # concat (various sizes, first ratio) and (first size, various ratios) w = np.concatenate((size_tensor * np.sqrt(ratio_tensor[0]), size_tensor[0]* np.sqrt(ratio_tensor[1:])))\ * in_height / in_width h = np.concatenate((size_tensor / np.sqrt(ratio_tensor[0]), sizes[0] / np.sqrt(ratio_tensor[1:]))) # Divide by 2 to get half height and half width anchor_manipulations = np.tile( np.stack((-w, -h, w, h)).T, (in_height * in_width, 1)) / 2 # Each center point will have boxes_per_pixel number of anchor boxes, so # generate grid of all anchor box centers with boxes_per_pixel repeats out_grid = np.stack([shift_x, shift_y, shift_x, shift_y], axis=1).repeat(boxes_per_pixel, axis=0) output = out_grid + anchor_manipulations # print(output) print(in_height, in_width) return np.expand_dims(output, axis=0)
def get_positional_embeddings(length, depth) -> np.ndarray: utils.check_condition( depth % 2 == 0, "Positional embeddings require an even embedding size it " "is however %d." % depth) # (1, depth) channels = np.arange(depth // 2).reshape((1, -1)) # (length, 1) positions = np.arange(0, length).reshape((-1, 1)) scaled_positions = positions / np.power(10000, (2 * channels) / depth) # sinusoids: sin = np.sin(scaled_positions) # cosines: cos = np.cos(scaled_positions) # interleave: (length, num_embed) encodings = np.hstack([sin, cos]) return encodings
def _test_crop_resize_with_diff_type(dtype): # test normal case data_in = np.arange(60).reshape((5, 4, 3)).astype(dtype) out_nd = transforms.CropResize(0, 0, 3, 2)(data_in) out_np = out_nd.asnumpy() assert (out_np.sum() == 180) assert ((out_np[0:2, 1, 1].flatten() == [4, 16]).all()) # test 4D input data_bath_in = np.arange(180).reshape((2, 6, 5, 3)).astype(dtype) out_batch_nd = transforms.CropResize(1, 2, 3, 4)(data_bath_in) out_batch_np = out_batch_nd.asnumpy() assert (out_batch_np.sum() == 7524) assert ((out_batch_np[0:2, 0:4, 1, 1].flatten() == [ 37, 52, 67, 82, 127, 142, 157, 172 ]).all()) # test normal case with resize data_in = np.random.uniform(0, 255, (300, 200, 3)).astype(dtype) out_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 1)(data_in) data_expected = transforms.Resize(size=25, interpolation=1)( data_in[:50, :100, :3] ) #nd.slice(data_in, (0, 0, 0), (50, 100, 3))) assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) # test 4D input with resize data_bath_in = np.random.uniform(0, 255, (3, 300, 200, 3)).astype(dtype) out_batch_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 1)(data_bath_in) for i in range(len(out_batch_nd)): actual = transforms.Resize(size=25, interpolation=1)( data_bath_in[i][:50, :100, :3] ).asnumpy( ) #(nd.slice(data_bath_in[i], (0, 0, 0), (50, 100, 3))).asnumpy() expected = out_batch_nd[i].asnumpy() assert_almost_equal(expected, actual) # test with resize height and width should be greater than 0 transformer = transforms.CropResize(0, 0, 100, 50, (-25, 25), 1) assertRaises(MXNetError, transformer, data_in) # test height and width should be greater than 0 transformer = transforms.CropResize(0, 0, -100, -50) assertRaises(MXNetError, transformer, data_in) # test cropped area is bigger than input data transformer = transforms.CropResize(150, 200, 200, 500) assertRaises(MXNetError, transformer, data_in) assertRaises(MXNetError, transformer, data_bath_in)
def __init__(self, num_hiddens, dropout, max_len=1000): super(PositionalEncoding, self).__init__() self.dropout = nn.Dropout(dropout) # Create a long enough `P` self.P = d2l.zeros((1, max_len, num_hiddens)) X = d2l.arange(max_len).reshape(-1, 1) / np.power( 10000, np.arange(0, num_hiddens, 2) / num_hiddens) self.P[:, :, 0::2] = np.sin(X) self.P[:, :, 1::2] = np.cos(X)
def test_smooth_l1(): A = np.arange((INT_OVERFLOW)) A.attach_grad() with mx.autograd.record(): B = npx.smooth_l1(A) assert B.shape == (INT_OVERFLOW, ) assert B[1] == 0.5 B.backward() assert A.grad.shape == (INT_OVERFLOW, ) assert A.grad[0] == 0
def n_dim_array(): x = np.arange(12) print(x, type(x)) x = x.reshape(-1, 3) print(" x {} of type '{}' with shape '{}'".format(x, type(x), x.shape)) y = np.zeros((2, 3, 4)) print(" y {} with shape {}".format(y, y.shape)) z = np.random.normal(10, 1, size=(3, 4)) print("z {} with shape {}".format(z, z.shape)) a = np.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) print("a {} with shape {}".format(a, a.shape))
def vectors(): # scalars are also ndarrays x = np.array(3.0) y = np.array(2.0) print("shape of scalar = {}".format(x.shape)) print("size of scalar = {}".format(x.size)) # vector xv = np.arange(4) print("shape of vector {} = {}".format(xv, xv.shape)) print("size of vector = {}".format(xv.size)) print(x + y, x * y, x / y, x ** y)
def forward(self, X, pred_positions): num_pred_positions = pred_positions.shape[1] pred_positions = pred_positions.reshape(-1) batch_size = X.shape[0] batch_idx = np.arange(0, batch_size) # Suppose that `batch_size` = 2, `num_pred_positions` = 3, then # `batch_idx` is np.array([0, 0, 0, 1, 1, 1]) batch_idx = np.repeat(batch_idx, num_pred_positions) masked_X = X[batch_idx, pred_positions] masked_X = masked_X.reshape((batch_size, num_pred_positions, -1)) mlm_Y_hat = self.mlp(masked_X) return mlm_Y_hat
def _add_workload_sum(): # OpArgMngr.add_workload('sum', np.ones(101, dtype=bool)) OpArgMngr.add_workload('sum', np.arange(1, 10).reshape((3, 3)), axis=1, keepdims=True) OpArgMngr.add_workload('sum', np.ones(500, dtype=np.float32)/10.) OpArgMngr.add_workload('sum', np.ones(500, dtype=np.float64)/10.) for dt in (np.float16, np.float32, np.float64): for v in (0, 1, 2, 7, 8, 9, 15, 16, 19, 127, 128, 1024, 1235): d = np.arange(1, v + 1, dtype=dt) OpArgMngr.add_workload('sum', d) d = np.ones(500, dtype=dt) OpArgMngr.add_workload('sum', d[::2]) OpArgMngr.add_workload('sum', d[1::2]) OpArgMngr.add_workload('sum', d[::3]) OpArgMngr.add_workload('sum', d[1::3]) OpArgMngr.add_workload('sum', d[::-2]) OpArgMngr.add_workload('sum', d[-1::-2]) OpArgMngr.add_workload('sum', d[::-3]) OpArgMngr.add_workload('sum', d[-1::-3]) d = np.ones((1,), dtype=dt) d += d OpArgMngr.add_workload('sum', d)
def _add_workload_inner(): OpArgMngr.add_workload('inner', np.zeros(shape=(1, 80), dtype=np.float64), np.zeros(shape=(1, 80), dtype=np.float64)) for dt in [np.float32, np.float64]: # OpArgMngr.add_workload('inner', np.array(3, dtype=dt)[()], np.array([1, 2], dtype=dt)) # OpArgMngr.add_workload('inner', np.array([1, 2], dtype=dt), np.array(3, dtype=dt)[()]) A = np.array([[1, 2], [3, 4]], dtype=dt) B = np.array([[1, 3], [2, 4]], dtype=dt) C = np.array([1, 1], dtype=dt) OpArgMngr.add_workload('inner', A.T, C) OpArgMngr.add_workload('inner', C, A.T) OpArgMngr.add_workload('inner', B, C) OpArgMngr.add_workload('inner', C, B) OpArgMngr.add_workload('inner', A, B) OpArgMngr.add_workload('inner', A, A) OpArgMngr.add_workload('inner', A, A.copy()) a = np.arange(5).astype(dt) b = a[::-1] OpArgMngr.add_workload('inner', b, a) a = np.arange(24).reshape(2,3,4).astype(dt) b = np.arange(24, 48).reshape(2,3,4).astype(dt) OpArgMngr.add_workload('inner', a, b) OpArgMngr.add_workload('inner', b, a)
def _add_workload_flip(): OpArgMngr.add_workload('flip', np.random.normal(size=(4, 4)), 1) OpArgMngr.add_workload('flip', np.array([[0, 1, 2], [3, 4, 5]]), 1) OpArgMngr.add_workload('flip', np.random.normal(size=(4, 4)), 0) OpArgMngr.add_workload('flip', np.array([[0, 1, 2], [3, 4, 5]]), 0) OpArgMngr.add_workload('flip', np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]), 0) OpArgMngr.add_workload('flip', np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]), 1) OpArgMngr.add_workload('flip', np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]), 2) for i in range(4): OpArgMngr.add_workload('flip', np.arange(2 * 3 * 4 * 5).reshape(2, 3, 4, 5), i) OpArgMngr.add_workload('flip', np.array([[1, 2, 3], [4, 5, 6]])) OpArgMngr.add_workload('flip', np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]), ()) OpArgMngr.add_workload('flip', np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]), (0, 2)) OpArgMngr.add_workload('flip', np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]), (1, 2))
def train_and_pred(train_features, test_feature, train_labels, test_data, num_epochs, lr, weight_decay, batch_size): net = get_net() train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, weight_decay, batch_size) d2l.plot(np.arange(1, num_epochs + 1), [train_ls], xlabel='epoch', ylabel='log rmse', xlim=[1, num_epochs], yscale='log') print(f'train log rmse {float(train_ls[-1]):f}') # Apply the network to the test set preds = net(test_features).asnumpy() # Reformat it to export to Kaggle test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0]) submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1) submission.to_csv('submission.csv', index=False)
def detach_computation(): """ Lets say we have a function y that relies on a function x. We could write that as: y = x * x now lets also say that we have a function z, that depends on both y and x. If we wanted to compute the gradient of z with respect to x and treat y as a constant, how could we possibly do it? The answer, we can compute y and then detach it from the graph, allowing it to be used as a constant in another computation """ x = np.arange(4) x.attach_grad() with autograd.record(): # The computation for y is computed, with the derivative of y # being 2x. y = x * x # Here we convert y to a constant by creating a variable u that # is detached from the graph, allowing us to record future computations # that store the result of y but not how the results came about. u = y.detach() # Now we can compute the gradient of z with respect to x while treating # u as a constant. (The derivative becomes u*x instead of 3x^2, which # would've been computed had we used y instead of u). z = u * x print("z") # We are now obtaining the gradients of function z with respect to x. Because # we used the detached variable u, the gradient becomes the result of u * x. z.backward() print(x.grad) print("The gradients for the partial derivate of z with respect to x", x.grad) print(x.grad == u) # Because the operations for y were recorded when computing underneath # the autograd, we're able to compute the gradient for y with respect # to x. y.backward() print("The derivative of y is 2x, resulting in gradients: ", x.grad) print(x.grad == 2 * x)
def n_dim_array_operations(): x = np.array([1, 2, 4, 8]) y = np.array([2, 2, 2, 2]) print(x + y, x - y, x * y, x / y, x**y) # The ** operator is exponentiation print("e^x of {} = {}".format(x, np.exp(x))) print("sin(x) of {} = {}".format(x, np.sin(x))) x = np.arange(12).reshape(3, 4) y = np.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) axis0 = np.concatenate([x, y], axis=0) print("concat axis 0 : {}, shape {}".format(axis0, axis0.shape)) axis1 = np.concatenate([x, y], axis=1) print("concat axis 1 : {}, shape {}".format(axis1, axis1.shape)) equal = x == y greater = x > y print("equal x = y: {} == {} = {}".format(x, y, equal)) print("greater x > y: {} > {} = {}".format(x, y, greater))
def test_samplek_func(batch_size, beam_size, target_vocab_size, top_n): pytest.importorskip("mxnet") from mxnet import np import sockeye.beam_search # arrange scores increasing values from left to right, so the best item is always index 0, next-best 1, and so on scores = np.array([ list(range(1, target_vocab_size + 1)) for _ in range(batch_size * beam_size) ]) # normalize target_dists = scores / scores.sum(axis=1, keepdims=True) samplek = sockeye.beam_search.SampleK(n=top_n) samplek.initialize() sample_best_hyp_indices = np.arange(0, batch_size * beam_size, dtype='int32') # 0..(batch_size * beam_size)-1 expected_hyps = np.array(range(batch_size * beam_size), dtype='int32') finished = (np.random.uniform(0, 1, (batch_size * beam_size)) > 0.5).astype('int32') for i in [1, 2]: if i == 2: samplek.hybridize() hyps, words, values = samplek(scores, scores, finished, sample_best_hyp_indices) assert hyps.shape[0] == batch_size * beam_size # The indices should always be the integers from 0 to batch*beam-1 assert sum(hyps == expected_hyps).item() == (batch_size * beam_size) if top_n != 0: # Scores are increasing left-to-right, so best items are all the lowest word IDs. # No word id greater than the cap (top_n) should be selected assert np.sum(words >= top_n).item() == 0 # word index should be zero for all finished hypotheses assert np.sum(np.where(finished, words, finished)).item() == 0
def memory(): x = np.arange(12).reshape(3, 4) y = np.array([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) before = id(x) x += y equals = id(x) == before print("id(x) after {} : id(x) before {} : equals {}".format( id(x), before, equals)) z = np.sin(y) print('id(z):', id(z)) z[:] = x + y print('id(z):', id(z)) a = x.asnumpy() b = np.array(a) print("type a {}, type b {}, id(a) {}, id(b) {}".format( type(a), type(b), id(a), id(b))) a = np.array([3.5]) a, a.item(), float(a), int(a)
def forward(self, X, state): enc_outputs, enc_valid_len = state[0], state[1] # state[2][i] contains the past queries for this block if state[2][self.i] is None: key_values = X else: key_values = np.concatenate((state[2][self.i], X), axis=1) state[2][self.i] = key_values if autograd.is_training(): batch_size, seq_len, _ = X.shape # Shape: (batch_size, seq_len), the values in the j-th column # are j+1 valid_len = np.tile(np.arange(1, seq_len + 1, ctx=X.ctx), (batch_size, 1)) else: valid_len = None X2 = self.attention1(X, key_values, key_values, valid_len) Y = self.addnorm1(X, X2) Y2 = self.attention2(Y, enc_outputs, enc_outputs, enc_valid_len) Z = self.addnorm2(Y, Y2) return self.addnorm3(Z, self.ffn(Z)), state
def multibox_detection(cls_probs, offset_preds, anchors, nms_threshold=0.5, pos_threshold=0.00999999978): device, batch_size = cls_probs.ctx, cls_probs.shape[0] anchors = np.squeeze(anchors, axis=0) num_classes, num_anchors = cls_probs.shape[1], cls_probs.shape[2] out = [] # print(offset_preds) for i in range(batch_size): cls_prob, offset_pred = cls_probs[i], offset_preds[i].reshape(-1, 4) conf, class_id = np.max(cls_prob[1:], 0), np.argmax(cls_prob[1:], 0) predicted_bb = offset_inverse(anchors, offset_pred) keep = nms(predicted_bb, conf, 0.5) print(keep) # Find all non_keep indices and set the class_id to background all_idx = np.arange(num_anchors, dtype=np.int32, ctx=device) combined = np.concatenate((keep, all_idx)) unique, counts = np.unique(combined, return_counts=True) print(unique, " . ", counts) non_keep = unique[counts == 1] all_id_sorted = np.concatenate((keep, non_keep)) class_id[non_keep] = -1 print(class_id) class_id = class_id[all_id_sorted].astype('float32') print(class_id) conf, predicted_bb = conf[all_id_sorted], predicted_bb[all_id_sorted] print(conf) print(predicted_bb) # threshold to be a positive prediction below_min_idx = (conf < pos_threshold) class_id[below_min_idx] = -1 conf[below_min_idx] = 1 - conf[below_min_idx] pred_info = np.concatenate((np.expand_dims( class_id, axis=1), np.expand_dims(conf, axis=1), predicted_bb), axis=1) out.append(pred_info) return np.stack(out)
for j in range(Y.shape[1]): if mode == 'max': Y[i, j] = X[i:i + p_h, j:j + p_w].max() elif mode == 'avg': Y[i, j] = X[i:i + p_h, j:j + p_w].mean() return Y X = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]]) print(X) print(pool2d(X, (2, 2))) print(pool2d(X, (2, 2), 'avg')) X = np.arange(16, dtype=np.float32).reshape((1, 1, 4, 4)) print(X) pool2d = nn.MaxPool2D(3) # Because there are no model parameters in the pooling layer, we do not need # to call the parameter initialization function print(pool2d(X)) pool2d = nn.MaxPool2D((2, 3), padding=(1, 2), strides=(2, 3)) print(pool2d(X)) # multi channel X = np.concatenate((X, X + 1), 1) print(X) pool2d_mp = nn.MaxPool2D(3, padding=1, strides=2)
def _init_sinusoidal_base(units): half_units = units // 2 val = np.log(10000) / (half_units - 1) val = np.exp(np.arange(half_units, dtype=np.float32) * -val) return val
def main(): A = np.arange(20).reshape(5, 4) products(A)
#4.1.2 Activation Functions from mxnet import autograd, np, npx from d2l import mxnet as d2l npx.set_np() #Plot ReLu function x = np.arange(-8.0, 8.0, 0.1) x.attach_grad() with autograd.record(): y = npx.relu(x) d2l.plot(x, y, 'x', 'relu(x)', figsize = (5, 2.5)) y.backward() d2l.plot(x, x.grad, 'x', 'grad of relu', figsize = (5, 2.5)) #Plot Sigmoid function with autograd.record(): y = npx.sigmoid(x) d2l.plot(x, y, 'x', 'sigmoid(x)', figsize = (5, 2.5)) y.backward() d2l.plot(x, x.grad, 'x', 'grad of sigmoid', figsize = (5, 2.5)) #Plot tanh function with autograd.record(): y = np.tanh(x) #npx doesnt have tanh function d2l.plot(x, y, 'x', 'tanh(x)', figsize = (5, 2.5)) y.backward() d2l.plot(x, x.grad, 'x', 'grad of tanh', figsize = (5, 2.5)) #Calculate the derivative of the pReLU activation function #with autograd.record():
def _prepare_workloads(): array_pool = { '4x1': np.random.uniform(size=(4, 1)) + 2, '1x2': np.random.uniform(size=(1, 2)) + 2, '1x1x0': np.array([[[]]]) } dt_int = [np.int8, np.int32, np.int64, np.uint8] dt_float = [np.float16, np.float32, np.float64] dt = dt_int + dt_float # workloads for array function protocol OpArgMngr.add_workload('argmax', array_pool['4x1']) OpArgMngr.add_workload('broadcast_arrays', array_pool['4x1'], array_pool['1x2']) OpArgMngr.add_workload('broadcast_to', array_pool['4x1'], (4, 2)) OpArgMngr.add_workload('clip', array_pool['4x1'], 0.2, 0.8) OpArgMngr.add_workload('concatenate', [array_pool['4x1'], array_pool['4x1']]) OpArgMngr.add_workload('concatenate', [array_pool['4x1'], array_pool['4x1']], axis=1) OpArgMngr.add_workload('copy', array_pool['4x1']) for ctype in dt: OpArgMngr.add_workload('cumsum', np.array([1, 2, 10, 11, 6, 5, 4], dtype=ctype)) OpArgMngr.add_workload('cumsum', np.array( [[1, 2, 3, 4], [5, 6, 7, 9], [10, 3, 4, 5]], dtype=ctype), axis=0) OpArgMngr.add_workload('cumsum', np.array( [[1, 2, 3, 4], [5, 6, 7, 9], [10, 3, 4, 5]], dtype=ctype), axis=1) OpArgMngr.add_workload( 'ravel', np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])) OpArgMngr.add_workload('dot', array_pool['4x1'], array_pool['4x1'].T) OpArgMngr.add_workload('expand_dims', array_pool['4x1'], -1) OpArgMngr.add_workload('fix', array_pool['4x1']) OpArgMngr.add_workload('max', array_pool['4x1']) OpArgMngr.add_workload('min', array_pool['4x1']) OpArgMngr.add_workload('mean', array_pool['4x1']) OpArgMngr.add_workload('mean', array_pool['4x1'], axis=0, keepdims=True) OpArgMngr.add_workload('mean', np.array([[1, 2, 3], [4, 5, 6]])) OpArgMngr.add_workload('mean', np.array([[1, 2, 3], [4, 5, 6]]), axis=0) OpArgMngr.add_workload('mean', np.array([[1, 2, 3], [4, 5, 6]]), axis=1) OpArgMngr.add_workload('ones_like', array_pool['4x1']) OpArgMngr.add_workload('prod', array_pool['4x1']) OpArgMngr.add_workload('repeat', array_pool['4x1'], 3) OpArgMngr.add_workload('repeat', np.array(_np.arange(12).reshape(4, 3)[:, 2]), 3) m = _np.array([1, 2, 3, 4, 5, 6]) m_rect = m.reshape((2, 3)) # OpArgMngr.add_workload('repeat', np.array(m), [1, 3, 2, 1, 1, 2]) # Argument "repeats" only supports int OpArgMngr.add_workload('repeat', np.array(m), 2) B = np.array(m_rect) # OpArgMngr.add_workload('repeat', B, [2, 1], axis=0) # Argument "repeats" only supports int # OpArgMngr.add_workload('repeat', B, [1, 3, 2], axis=1) # Argument "repeats" only supports int OpArgMngr.add_workload('repeat', B, 2, axis=0) OpArgMngr.add_workload('repeat', B, 2, axis=1) # test_repeat_broadcasting a = _np.arange(60).reshape(3, 4, 5) for axis in itertools.chain(range(-a.ndim, a.ndim), [None]): OpArgMngr.add_workload('repeat', np.array(a), 2, axis=axis) # OpArgMngr.add_workload('repeat', np.array(a), [2], axis=axis) # Argument "repeats" only supports int arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) OpArgMngr.add_workload('reshape', arr, (2, 6)) OpArgMngr.add_workload('reshape', arr, (3, 4)) # OpArgMngr.add_workload('reshape', arr, (3, 4), order='F') # Items are not equal with order='F' OpArgMngr.add_workload('reshape', arr, (3, 4), order='C') OpArgMngr.add_workload('reshape', np.array(_np.ones(100)), (100, 1, 1)) # test_reshape_order a = np.array(_np.arange(6)) # OpArgMngr.add_workload('reshape', a, (2, 3), order='F') # Items are not equal with order='F' a = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) b = a[:, 1] # OpArgMngr.add_workload('reshape', b, (2, 2), order='F') # Items are not equal with order='F' a = np.array(_np.ones((0, 2))) OpArgMngr.add_workload('reshape', a, -1, 2) OpArgMngr.add_workload('rint', np.array(4607998452777363968)) OpArgMngr.add_workload('rint', array_pool['4x1']) # test_roll1d(self) OpArgMngr.add_workload('roll', np.array(_np.arange(10)), 2) # test_roll2d(self) x2 = np.array(_np.reshape(_np.arange(10), (2, 5))) OpArgMngr.add_workload('roll', x2, 1) OpArgMngr.add_workload('roll', x2, 1, axis=0) OpArgMngr.add_workload('roll', x2, 1, axis=1) # # Roll multiple axes at once. OpArgMngr.add_workload('roll', x2, 1, axis=(0, 1)) OpArgMngr.add_workload('roll', x2, (1, 0), axis=(0, 1)) OpArgMngr.add_workload('roll', x2, (-1, 0), axis=(0, 1)) OpArgMngr.add_workload('roll', x2, (0, 1), axis=(0, 1)) OpArgMngr.add_workload('roll', x2, (0, -1), axis=(0, 1)) OpArgMngr.add_workload('roll', x2, (1, 1), axis=(0, 1)) OpArgMngr.add_workload('roll', x2, (-1, -1), axis=(0, 1)) # # Roll the same axis multiple times. # OpArgMngr.add_workload('roll', x2, 1, axis=(0, 0)) # Check failed: axes[i - 1] < axes[i] (0 vs. 0) : axes have duplicates [0,0] # OpArgMngr.add_workload('roll', x2, 1, axis=(1, 1)) # Check failed: axes[i - 1] < axes[i] (1 vs. 1) : axes have duplicates [1,1] # # Roll more than one turn in either direction. OpArgMngr.add_workload('roll', x2, 6, axis=1) OpArgMngr.add_workload('roll', x2, -4, axis=1) # # test_roll_empty OpArgMngr.add_workload('roll', np.array([]), 1) OpArgMngr.add_workload('split', array_pool['4x1'], 2) OpArgMngr.add_workload('squeeze', array_pool['4x1']) OpArgMngr.add_workload('stack', [array_pool['4x1']] * 2) OpArgMngr.add_workload('std', array_pool['4x1']) OpArgMngr.add_workload('sum', array_pool['4x1']) OpArgMngr.add_workload('swapaxes', array_pool['4x1'], 0, 1) OpArgMngr.add_workload('tensordot', array_pool['4x1'], array_pool['4x1']) OpArgMngr.add_workload('tile', array_pool['4x1'], 2) OpArgMngr.add_workload('tile', np.array([[[]]]), (3, 2, 5)) OpArgMngr.add_workload('transpose', array_pool['4x1']) OpArgMngr.add_workload('var', array_pool['4x1']) OpArgMngr.add_workload('zeros_like', array_pool['4x1']) OpArgMngr.add_workload('outer', np.ones((5)), np.ones((2))) OpArgMngr.add_workload('meshgrid', np.array([1, 2, 3])) OpArgMngr.add_workload('meshgrid', np.array([1, 2, 3]), np.array([4, 5, 6, 7])) OpArgMngr.add_workload('meshgrid', np.array([1, 2, 3]), np.array([4, 5, 6, 7]), indexing='ij') _add_workload_einsum() # workloads for array ufunc protocol OpArgMngr.add_workload('add', array_pool['4x1'], array_pool['1x2']) OpArgMngr.add_workload('add', array_pool['4x1'], 2) OpArgMngr.add_workload('add', 2, array_pool['4x1']) OpArgMngr.add_workload('add', array_pool['4x1'], array_pool['1x1x0']) OpArgMngr.add_workload('subtract', array_pool['4x1'], array_pool['1x2']) OpArgMngr.add_workload('subtract', array_pool['4x1'], 2) OpArgMngr.add_workload('subtract', 2, array_pool['4x1']) OpArgMngr.add_workload('subtract', array_pool['4x1'], array_pool['1x1x0']) OpArgMngr.add_workload('multiply', array_pool['4x1'], array_pool['1x2']) OpArgMngr.add_workload('multiply', array_pool['4x1'], 2) OpArgMngr.add_workload('multiply', 2, array_pool['4x1']) OpArgMngr.add_workload('multiply', array_pool['4x1'], array_pool['1x1x0']) OpArgMngr.add_workload('power', array_pool['4x1'], array_pool['1x2']) OpArgMngr.add_workload('power', array_pool['4x1'], 2) OpArgMngr.add_workload('power', 2, array_pool['4x1']) OpArgMngr.add_workload('power', array_pool['4x1'], array_pool['1x1x0']) OpArgMngr.add_workload('power', np.array([1, 2, 3], np.int32), 2.00001) OpArgMngr.add_workload('power', np.array([15, 15], np.int64), np.array([15, 15], np.int64)) OpArgMngr.add_workload('power', 0, np.arange(1, 10)) OpArgMngr.add_workload('mod', array_pool['4x1'], array_pool['1x2']) OpArgMngr.add_workload('mod', array_pool['4x1'], 2) OpArgMngr.add_workload('mod', 2, array_pool['4x1']) OpArgMngr.add_workload('mod', array_pool['4x1'], array_pool['1x1x0']) # test remainder basic OpArgMngr.add_workload('remainder', np.array([0, 1, 2, 4, 2], dtype=np.float16), np.array([-2, 5, 1, 4, 3], dtype=np.float16)) def _signs(dt): if dt in [np.uint8]: return (+1, ) else: return (+1, -1) for ct in dt: for sg1, sg2 in itertools.product(_signs(ct), _signs(ct)): a = np.array(sg1 * 71, dtype=ct) b = np.array(sg2 * 19, dtype=ct) OpArgMngr.add_workload('remainder', a, b) # test remainder exact nlst = list(range(-127, 0)) plst = list(range(1, 128)) dividend = nlst + [0] + plst divisor = nlst + plst arg = list(itertools.product(dividend, divisor)) tgt = list(divmod(*t) for t in arg) a, b = np.array(arg, dtype=int).T # convert exact integer results from Python to float so that # signed zero can be used, it is checked. for dt in [np.float16, np.float32, np.float64]: fa = a.astype(dt) fb = b.astype(dt) OpArgMngr.add_workload('remainder', fa, fb) # test_float_remainder_roundoff for ct in dt_float: for sg1, sg2 in itertools.product((+1, -1), (+1, -1)): a = np.array(sg1 * 78 * 6e-8, dtype=ct) b = np.array(sg2 * 6e-8, dtype=ct) OpArgMngr.add_workload('remainder', a, b) # test_float_remainder_corner_cases # Check remainder magnitude. for ct in dt_float: b = _np.array(1.0) a = np.array(_np.nextafter(_np.array(0.0), -b), dtype=ct) b = np.array(b, dtype=ct) OpArgMngr.add_workload('remainder', a, b) OpArgMngr.add_workload('remainder', -a, -b) # Check nans, inf for ct in [np.float16, np.float32, np.float64]: fone = np.array(1.0, dtype=ct) fzer = np.array(0.0, dtype=ct) finf = np.array(np.inf, dtype=ct) fnan = np.array(np.nan, dtype=ct) # OpArgMngr.add_workload('remainder', fone, fzer) # failed OpArgMngr.add_workload('remainder', fone, fnan) OpArgMngr.add_workload('remainder', finf, fone) OpArgMngr.add_workload('maximum', array_pool['4x1'], array_pool['1x2']) OpArgMngr.add_workload('maximum', array_pool['4x1'], 2) OpArgMngr.add_workload('maximum', 2, array_pool['4x1']) OpArgMngr.add_workload('maximum', array_pool['4x1'], array_pool['1x1x0']) OpArgMngr.add_workload('minimum', array_pool['4x1'], array_pool['1x2']) OpArgMngr.add_workload('minimum', array_pool['4x1'], 2) OpArgMngr.add_workload('minimum', 2, array_pool['4x1']) OpArgMngr.add_workload('minimum', array_pool['4x1'], array_pool['1x1x0']) OpArgMngr.add_workload('negative', array_pool['4x1']) OpArgMngr.add_workload('absolute', array_pool['4x1']) OpArgMngr.add_workload('sign', array_pool['4x1']) OpArgMngr.add_workload('sign', np.array([-2, 5, 1, 4, 3], dtype=np.float16)) OpArgMngr.add_workload('sign', np.array([-.1, 0, .1])) # OpArgMngr.add_workload('sign', np.array(_np.array([_np.nan]))) # failed OpArgMngr.add_workload('exp', array_pool['4x1']) OpArgMngr.add_workload('log', array_pool['4x1']) OpArgMngr.add_workload('log2', array_pool['4x1']) OpArgMngr.add_workload('log2', np.array(2.**65)) OpArgMngr.add_workload('log2', np.array(np.inf)) OpArgMngr.add_workload('log2', np.array(1.)) OpArgMngr.add_workload('log1p', np.array(-1.)) OpArgMngr.add_workload('log1p', np.array(np.inf)) OpArgMngr.add_workload('log1p', np.array(1e-6)) OpArgMngr.add_workload('log10', array_pool['4x1']) OpArgMngr.add_workload('expm1', array_pool['4x1']) OpArgMngr.add_workload('sqrt', array_pool['4x1']) OpArgMngr.add_workload('square', array_pool['4x1']) OpArgMngr.add_workload('cbrt', array_pool['4x1']) for ctype in [np.float16, np.float32, np.float64]: OpArgMngr.add_workload('reciprocal', np.array([-2, 5, 1, 4, 3], dtype=ctype)) OpArgMngr.add_workload('reciprocal', np.array([-2, 0, 1, 0, 3], dtype=ctype)) OpArgMngr.add_workload('reciprocal', np.array([0], dtype=ctype)) OpArgMngr.add_workload('sin', array_pool['4x1']) OpArgMngr.add_workload('cos', array_pool['4x1']) OpArgMngr.add_workload('tan', array_pool['4x1']) OpArgMngr.add_workload('sinh', array_pool['4x1']) OpArgMngr.add_workload('cosh', array_pool['4x1']) OpArgMngr.add_workload('tanh', array_pool['4x1']) OpArgMngr.add_workload('arcsin', array_pool['4x1'] - 2) OpArgMngr.add_workload('arccos', array_pool['4x1'] - 2) OpArgMngr.add_workload('arctan', array_pool['4x1']) OpArgMngr.add_workload('arcsinh', array_pool['4x1']) OpArgMngr.add_workload('arccosh', array_pool['4x1']) OpArgMngr.add_workload('arctanh', array_pool['4x1'] - 2) OpArgMngr.add_workload('ceil', array_pool['4x1']) OpArgMngr.add_workload('trunc', array_pool['4x1']) OpArgMngr.add_workload('floor', array_pool['4x1']) OpArgMngr.add_workload('logical_not', np.ones(10, dtype=np.int32)) OpArgMngr.add_workload('logical_not', array_pool['4x1']) OpArgMngr.add_workload('logical_not', np.array([True, False, True, False], dtype=np.bool))
def test_crop_resize(): def _test_crop_resize_with_diff_type(dtype): # test normal case data_in = np.arange(60).reshape((5, 4, 3)).astype(dtype) out_nd = transforms.CropResize(0, 0, 3, 2)(data_in) out_np = out_nd.asnumpy() assert (out_np.sum() == 180) assert ((out_np[0:2, 1, 1].flatten() == [4, 16]).all()) # test 4D input data_bath_in = np.arange(180).reshape((2, 6, 5, 3)).astype(dtype) out_batch_nd = transforms.CropResize(1, 2, 3, 4)(data_bath_in) out_batch_np = out_batch_nd.asnumpy() assert (out_batch_np.sum() == 7524) assert ((out_batch_np[0:2, 0:4, 1, 1].flatten() == [ 37, 52, 67, 82, 127, 142, 157, 172 ]).all()) # test normal case with resize data_in = np.random.uniform(0, 255, (300, 200, 3)).astype(dtype) out_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 1)(data_in) data_expected = transforms.Resize(size=25, interpolation=1)( data_in[:50, :100, :3] ) #nd.slice(data_in, (0, 0, 0), (50, 100, 3))) assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy()) # test 4D input with resize data_bath_in = np.random.uniform(0, 255, (3, 300, 200, 3)).astype(dtype) out_batch_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 1)(data_bath_in) for i in range(len(out_batch_nd)): actual = transforms.Resize(size=25, interpolation=1)( data_bath_in[i][:50, :100, :3] ).asnumpy( ) #(nd.slice(data_bath_in[i], (0, 0, 0), (50, 100, 3))).asnumpy() expected = out_batch_nd[i].asnumpy() assert_almost_equal(expected, actual) # test with resize height and width should be greater than 0 transformer = transforms.CropResize(0, 0, 100, 50, (-25, 25), 1) assertRaises(MXNetError, transformer, data_in) # test height and width should be greater than 0 transformer = transforms.CropResize(0, 0, -100, -50) assertRaises(MXNetError, transformer, data_in) # test cropped area is bigger than input data transformer = transforms.CropResize(150, 200, 200, 500) assertRaises(MXNetError, transformer, data_in) assertRaises(MXNetError, transformer, data_bath_in) for dtype in ['uint8', 'float32', 'float64']: _test_crop_resize_with_diff_type(dtype) # test npx.image.crop backward def test_crop_backward(test_nd_arr, TestCase): a_np = test_nd_arr.asnumpy() b_np = a_np[(slice(TestCase.y, TestCase.y + TestCase.height), slice(TestCase.x, TestCase.x + TestCase.width), slice(0, 3))] data = mx.sym.Variable('data') crop_sym = mx.sym.image.crop(data, TestCase.x, TestCase.y, TestCase.width, TestCase.height) expected_in_grad = np.zeros_like(np.array(a_np)) expected_in_grad[(slice(TestCase.y, TestCase.y + TestCase.height), slice(TestCase.x, TestCase.x + TestCase.width), slice(0, 3))] = b_np check_symbolic_backward(crop_sym, [a_np], [b_np], [expected_in_grad]) TestCase = namedtuple('TestCase', ['x', 'y', 'width', 'height']) test_list = [ TestCase(0, 0, 3, 3), TestCase(2, 1, 1, 2), TestCase(0, 1, 3, 2) ] for dtype in ['uint8', 'float32', 'float64']: data_in = np.arange(60).reshape((5, 4, 3)).astype(dtype) for test_case in test_list: test_crop_backward(data_in, test_case)