def __init__(self, in_features, parameters=None): """ Init method. """ super(SReLU, self).__init__() self.in_features = in_features if parameters is None: self.tr = mge.Parameter( mge.tensor(np.random.randn(in_features).astype(np.float32)) ) self.tl = mge.Parameter( mge.tensor(np.random.randn(in_features).astype(np.float32)) ) self.ar = mge.Parameter( mge.tensor(np.random.randn(in_features).astype(np.float32)) ) self.al = mge.Parameter( mge.tensor(np.random.randn(in_features).astype(np.float32)) ) self.tr.requiresGrad = True self.tl.requiresGrad = True self.ar.requiresGrad = True self.al.requiresGrad = True else: self.tr, self.tl, self.ar, self.al = parameters
def test_regression_1762(): x = F.ones((10, 10, 3, 3)) conv = M.Conv2d(10, 10, kernel_size=3, padding=1) t_shape = (1, 10, 1, 1) weight = mge.Parameter(np.ones(t_shape, dtype=np.float32)) bias = mge.Parameter(np.zeros(t_shape, dtype=np.float32)) gm = GradManager() gm.attach(list(conv.parameters()) + [weight, bias]) with gm: out1 = conv(x) out2 = F.batch_norm( out1, None, None, weight, bias, training=True, ) # Weird error only occur when this action is placed after BN # Op type is not relevant loss = out1 + 1 gm.backward(loss)
def get_params(l1): W1 = mge.Parameter(np.random.randn(l1, 2).astype(np.float32)) B1 = mge.Parameter(np.random.randn(l1).astype(np.float32)) W2 = mge.Parameter(np.random.randn(2, l1).astype(np.float32)) B2 = mge.Parameter(np.random.randn(2).astype(np.float32)) return W1, B1, W2, B2
def __init__(self, alpha=None): """ Init method. """ super(SoftExponential, self).__init__() # initialize alpha if alpha is None: self.alpha = mge.Parameter(mge.tensor(0.0)) else: self.alpha = mge.Parameter(mge.tensor(alpha)) self.alpha.requiresGrad = True
def test_empty_grad_in_backward(): x = mge.Parameter(F.full(100, 0.5)) y = mge.Parameter(F.ones(100)) gm = GradManager() gm.attach([x, y]) with gm: z = F.where(x > 0.7, x, y) loss = z.sum() gm.backward(loss) assert np.all(x.grad.numpy() == 0) assert np.all(y.grad.numpy() == 1)
def two_layer_conv(x): # (8, 3, 3, 3) 代表(输出信道数,输入信道数,卷积核高度,卷积核宽度) conv_weight = mge.Parameter(np.random.randn(8, 3, 3, 3).astype(np.float32)) # 对于 8 个卷积核,提供 8 个 bias conv_bias = mge.Parameter(np.zeros((1, 8, 1, 1), dtype=np.float32)) x = F.conv2d(x, conv_weight, conv_bias) x = F.relu(x) conv_weight = mge.Parameter( np.random.randn(16, 8, 3, 3).astype(np.float32)) conv_bias = mge.Parameter(np.zeros((1, 16, 1, 1), dtype=np.float32)) x = F.conv2d(x, conv_weight, conv_bias) x = F.relu(x) return x
def test_no_dependency(): x = mge.tensor(3) w = mge.Parameter(1.0) w_no_dep = mge.Parameter(1.0) gm = GradManager() gm.attach(w) gm.attach(w_no_dep) with gm: out1 = x * w out2 = w_no_dep * out1 gm.backward(out1.sum()) assert w.grad is not None assert w_no_dep.grad is None
def test_grad_twice_method_3(): # model define model = CustomModel3() model.train() named_param = dict(list(model.named_parameters(requires_grad=True))) params = list(named_param.values()) external_params = [ meg.Parameter(np.random.normal(size=p.shape), dtype='float32') for p in params ] loss_fn = F.cross_entropy_with_softmax optimizer = optim.SGD(external_params, lr=0.003) # forward once optimizer.zero_grad() x1 = meg.tensor(np.random.randn(5, 10), dtype='float32') y1 = meg.tensor(np.random.randint(0, 5, (5)), dtype='int32') x2 = meg.tensor(np.random.randn(5, 10), dtype='float32') y2 = meg.tensor(np.random.randint(0, 5, (5)), dtype='int32') train_func3(x1, y1, x2, y2, loss_fn=loss_fn, opt=optimizer, net=model, params=external_params) optimizer.step()
def test_attached_tensors(): w1 = mge.Parameter(2.0) w2 = mge.Parameter(2.0) gm = GradManager() def check(expected): actual = gm.attached_tensors() assert len(expected) == len(actual) for exp, act in zip(expected, actual): assert exp is act gm.attach(w1) check([w1]) gm.attach(w2) check([w1, w2]) gm.attach(w1) check([w1, w2])
def __init__(self, mode="normal"): super().__init__() self.data = np.random.random((10, 100)).astype(np.float32) self.data1 = np.random.random((10, 10, 10)).astype(np.float32) self.linear = M.Linear(100, 200, bias=False) self.linear_bias = M.Linear(200, 200, bias=True) self.linear_bias.bias = mge.Parameter( np.random.random(self.linear_bias.bias.shape).astype(np.float32)) self.mode = mode
def test_attach_in_with_block(): a = mge.Parameter([1.0]) gm = GradManager() with gm: b = a * 3 gm.attach(b) c = b + 1 gm.backward(c) assert int(b.grad.numpy()) == 1
def __init__(self, cfg, input_shape: List[layers.ShapeSpec]): super().__init__() self.stride_list = cfg.stride in_channels = input_shape[0].channels num_classes = cfg.num_classes num_convs = 4 prior_prob = cfg.cls_prior_prob num_anchors = [cfg.num_anchors] * len(input_shape) assert ( len(set(num_anchors)) == 1 ), "not support different number of anchors between levels" num_anchors = num_anchors[0] cls_subnet = [] bbox_subnet = [] for _ in range(num_convs): cls_subnet.append( M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) ) cls_subnet.append(GroupNorm(32, in_channels)) cls_subnet.append(M.ReLU()) bbox_subnet.append( M.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) ) bbox_subnet.append(GroupNorm(32, in_channels)) bbox_subnet.append(M.ReLU()) self.cls_subnet = M.Sequential(*cls_subnet) self.bbox_subnet = M.Sequential(*bbox_subnet) self.cls_score = M.Conv2d( in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1 ) self.bbox_pred = M.Conv2d( in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1 ) self.ctrness = M.Conv2d( in_channels, num_anchors * 1, kernel_size=3, stride=1, padding=1 ) # Initialization for modules in [ self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred, self.ctrness ]: for layer in modules.modules(): if isinstance(layer, M.Conv2d): M.init.normal_(layer.weight, mean=0, std=0.01) M.init.fill_(layer.bias, 0) # Use prior in model initialization to improve stability bias_value = -math.log((1 - prior_prob) / prior_prob) M.init.fill_(self.cls_score.bias, bias_value) self.scale_list = mge.Parameter(np.ones(len(self.stride_list), dtype=np.float32))
def __init__(self, ndim, num_features, eps=1e-6, learnable_eps=False): """ Input Variables: ---------------- ndim: An integer indicating the number of dimensions of the expected input tensor. num_features: An integer indicating the number of input feature dimensions. eps: A scalar constant or learnable variable. learnable_eps: A bool value indicating whether the eps is learnable. """ assert ndim in [4, ], \ 'FilterResponseNorm only supports 3d, 4d or 5d inputs.' super(FilterResponseNormNd, self).__init__() shape = (1, num_features) + (1, ) * (ndim - 2) self.eps = mge.tensor(np.ones(shape, dtype=np.float32) * eps) # if not learnable_eps: # self.eps.requires_grad = False self.gamma = mge.Parameter(np.ones(shape, dtype=np.float32)) self.beta = mge.Parameter(np.zeros(shape, dtype=np.float32)) self.tau = mge.Parameter(np.zeros(shape, dtype=np.float32))
def test_mge_81(): np.random.seed(0) N, D = 3, 4 x = mge.Parameter(value=np.random.normal(size=(N, D)).astype(np.float32)) y = mge.Parameter(value=np.random.normal(size=(N, D)).astype(np.float32)) z = mge.Parameter(value=np.random.normal(size=(N, D)).astype(np.float32)) a = x * y b = a + z c = F.sum(b) grad_x = F.grad(c, x, use_virtual_grad=False) grad_y = F.grad(c, y, use_virtual_grad=False) grad_z = F.grad(c, z, use_virtual_grad=False) print(grad_x.numpy()) print(grad_y.numpy()) print(grad_z.numpy()) m = M.BatchNorm2d(4) input = tensor(np.zeros((64, 4, 32, 32), dtype=np.float32)) _ = m(input) m = M.BatchNorm2d(4, affine=False) _ = m(input)
def run_conv_bias(inp, w, b): b = b if has_bias else mge.Parameter(np.zeros_like(b.numpy())) return F.quantized.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, )
def __init__(self, mode): super().__init__() self.mode = mode self.data = np.random.random((1, 3, 224, 224)).astype(np.float32) self.normal_conv = M.Conv2d(3, 30, 3, stride=(2, 3), dilation=(2, 2), padding=(3, 1)) self.group_conv = M.Conv2d(3, 30, 3, stride=(2, 3), dilation=(2, 2), padding=(3, 1), groups=3) self.valid_pad_conv = M.Conv2d(3, 30, 4, padding=(1, 1)) self.valid_pad_1_conv = M.Conv2d(3, 30, 3, stride=2, padding=(1, 1)) self.same_pad_conv = M.Conv2d(3, 30, 3, padding=(1, 1)) self.same_pad_1_conv = M.Conv2d(3, 30, 4, stride=2, padding=(1, 1)) self.same_pad_2_conv = M.Conv2d(3, 30, 2, dilation=3, stride=2, padding=(1, 1)) self.normal_conv.bias = mge.Parameter( np.random.random(self.normal_conv.bias.shape).astype(np.float32)) self.group_conv.bias = mge.Parameter( np.random.random(self.group_conv.bias.shape).astype(np.float32)) self.transpose_conv = M.Sequential( M.ConvTranspose2d(3, 5, (3, 4), dilation=(2, 2), stride=(3, 2), padding=(2, 3), groups=1), M.ConvTranspose2d(5, 3, (3, 3)), ) self.transpose_conv[0].bias = mge.Parameter( np.random.random(self.transpose_conv[0].bias.shape).astype( np.float32)) self.transpose_conv[1].bias = mge.Parameter( np.random.random(self.transpose_conv[1].bias.shape).astype( np.float32)) self.tflite_transpose_conv = M.Sequential( M.ConvTranspose2d(3, 5, (3, 4), stride=(3, 2), groups=1), M.ConvTranspose2d(5, 3, (3, 3)), ) self.tflite_transpose_conv[0].bias = mge.Parameter( np.random.random(self.transpose_conv[0].bias.shape).astype( np.float32)) self.tflite_transpose_conv[1].bias = mge.Parameter( np.random.random(self.transpose_conv[1].bias.shape).astype( np.float32))
def test_tensor_set_dtype(): def check_dtype_value(tensor, dtype_scale, value): if mgb.dtype.is_quantize(tensor.dtype): if np.abs(mgb.dtype.get_scale(tensor.dtype) - dtype_scale) > 1e-5: raise AssertionError( "compare scale failed expect {} got {}".format( dtype_scale, mgb.dtype.get_scale(tensor.dtype))) if np.abs(tensor.numpy()[0][0] - value) > 1e-5: raise AssertionError( "compare value failed expect {} got {}".format( tensor.numpy()[0][0], value)) t = mge.Parameter(np.ones((3, 4), dtype="float32")) t.set_dtype(mgb.dtype.qint8(0.1)) check_dtype_value(t, 0.1, 10) t = mge.Parameter(np.ones((3, 4), dtype=mgb.dtype.qint8(1))) t.set_dtype(mgb.dtype.qint8(0.3)) check_dtype_value(t, 0.3, 3) t = mge.Buffer(np.ones((3, 4), dtype="float32")) t.set_dtype(mgb.dtype.qint8(0.1)) check_dtype_value(t, 0.1, 10) t = mge.Buffer(np.ones((3, 4), dtype=mgb.dtype.qint8(1))) t.set_dtype(mgb.dtype.qint8(0.3)) check_dtype_value(t, 0.3, 3) t = mge.Buffer(np.ones((3, 4), dtype="float32")) s = t + 1 s.set_dtype(mgb.dtype.qint8(0.2)) check_dtype_value(s, 0.2, 10) t.set_dtype(mgb.dtype.qint8(0.3)) s = t + 1 s.set_dtype(mgb.dtype.qint8(0.1)) check_dtype_value(s, 0.1, 18) s.set_dtype("float32") check_dtype_value(s, 0, 1.8)
def __init__(self): super().__init__() self.normal_conv = M.Conv2d( 3, 30, 3, stride=(2, 3), padding=(3, 1), dilation=(2, 2), ) self.normal_conv.bias = mge.Parameter( np.random.random(self.normal_conv.bias.shape).astype( np.float32))
def run_conv_bias(inp, w, b, format="NCHW"): b = b if has_bias else mge.Parameter(np.zeros_like(b.numpy())) if format == "NCHW4": inp = convert_to_nchw4(inp) w = convert_to_nchw4(w) b = convert_to_nchw4(b) return F.quantized.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, )
def test_tensor_name(): p = mge.Parameter(np.ones((3, 4), dtype="float32")) assert "shared" in p.name with pytest.raises(ValueError): p.name = "Parameter0" b = mge.Buffer(np.ones((3, 4), dtype="float32")) assert "shared" in b.name with pytest.raises(ValueError): b.name = "Buffer0" s = b + 1 assert "ADD" in s.name s.name = "WeightAdd1" assert s.name == "WeightAdd1"
def __init__(self, eswish=False, swish=False, beta=1.735, flatten=False): """ Init method. """ super(Swish, self).__init__() self.swish = swish self.eswish = eswish self.flatten = flatten self.beta = None self.param = None if eswish is not False: self.beta = beta if swish is not False: self.param = mge.Parameter(mge.tensor(np.random.randn(1))) self.param.requires_grad = True if eswish is not False and swish is not False and flatten is not False: raise RuntimeError( "Advisable to run either Swish or E-Swish or Flatten T-Swish" )
def test_attach_temporary(): w = mge.Parameter(2.0) gm = GradManager() gm.attach(w) def cb(x, g): assert x is ref() cb.called = True for i in range(3): with gm: cb.called = False x = mge.Tensor(i, dtype="float32") gm.attach(x, callbacks=cb) ref = weakref.ref(x) y = x * w gm.backward(y) assert cb.called del x assert ref() is None
def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, nonlinear_mode="IDENTITY", ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_int8 = mge.tensor(inpv, dtype=inp_dtype) w_int8 = mge.Parameter(wv, dtype=w_dtype) b_int32 = mge.Parameter(bv, dtype=b_dtype) inp_fp32 = inp_int8.astype("float32") w_fp32 = w_int8.astype("float32") b_fp32 = b_int32.astype("float32") def convert_to_nchw4(var): var = F.reshape(var, (var.shape[0], var.shape[1] // 4, 4, var.shape[2], var.shape[3])) var = F.transpose(var, (0, 1, 3, 4, 2)) return var def run_conv2d(inp, w, b): O = F.conv2d( inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW), ) if nonlinear_mode == "RELU": return F.relu(O) else: return O def run_conv_bias(inp, w, b, format="NCHW"): b = b if has_bias else mge.Parameter(np.zeros_like(b.numpy())) if format == "NCHW4": inp = convert_to_nchw4(inp) w = convert_to_nchw4(w) b = convert_to_nchw4(b) return F.quantized.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, ) format = "NCHW4" if mge.is_cuda_available() else "NCHW" expected = run_conv2d(inp_fp32, w_fp32, b_fp32) expected = expected.astype(out_dtype).astype("float32") result = run_conv_bias(inp_int8, w_int8, b_int32, format=format).astype("float32") if format == "NCHW4": result = F.transpose(result, (0, 1, 4, 2, 3)) expected = F.flatten(expected) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale)
from megengine.autodiff import GradManager w = mge.tensor([3.]) x = mge.tensor([2.]) b = mge.tensor(-1.) gm = GradManager().attach([w, b]) # 新建一个求导器,绑定需要求导的变量,实例通常习惯写成 gm with gm: # 开始记录计算图 p = F.mul(w, x) y = p + b gm.backward(y) # 计算 y 关于参数的导数,过程中不断地使用链式法则 print(w.grad) # 得到结果为 x print(b.grad) # 得到结果为 1 ''' 优化器(Optimizer) ''' w = mge.Parameter([3.]) x = mge.Tensor([2.]) b = mge.Parameter(-1.) print(type(w)) print(type(b)) gm = GradManager().attach([w, b]) # 这次 attach() 传入的是 Parameter 而不是 Tensor with gm: p = F.mul(w, x) y = p + b gm.backward(y) print(type(w.grad)) # 计算得到的梯度依然是 Tensor import megengine.optimizer as optim # 我们习惯将 optimizer 缩写为 optim
def test_wrong_dtype(): with pytest.raises(TypeError): mge.tensor(np.zeros((5, 5), dtype=np.float64)) with pytest.raises(TypeError): mge.Parameter(np.zeros((5, 5), dtype=np.int64))
def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, nonlinear_mode="identity", ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_quint4(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint4(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_uint4 = mge.Tensor(inpv, dtype=inp_dtype) w_int4 = mge.Parameter(wv, dtype=w_dtype) b_int32 = mge.Parameter(bv, dtype=b_dtype) inp_fp32 = inp_uint4.astype("float32") w_fp32 = w_int4.astype("float32") b_fp32 = b_int32.astype("float32") def run_conv2d(inp, w, b): O = F.conv2d( inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW), ) if nonlinear_mode == "relu": return F.relu(O) else: return O def run_conv_bias(inp, w, b): b = b if has_bias else mge.Parameter(np.zeros_like(b.numpy())) return F.quantized.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, ) expected = run_conv2d(inp_fp32, w_fp32, b_fp32) expected = expected.astype(out_dtype).astype("float32") result = run_conv_bias(inp_uint4, w_int4, b_int32).astype("float32") expected = F.flatten(expected) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale)
def test_func( N, IC, IH, IW, OC, KH, KW, SH, SW, PH, PW, DH, DW, groups=1, has_bias=True, conv_mode: str = "cross_correlation", compute_mode: str = "default", ): inp_scale = np.float32(rng.uniform(low=0.04, high=0.06)) weight_scale = np.float32(rng.uniform(low=0.04, high=0.06)) bias_scale = inp_scale * weight_scale out_scale = np.float32(rng.uniform(low=0.04, high=0.06)) inp_dtype = dtype.qint8(inp_scale) weight_dtype = dtype.qint8(weight_scale) bias_dtype = dtype.qint32(bias_scale) out_dtype = dtype.qint8(out_scale) inp_fp32 = rng.uniform(low=-1, high=1, size=(N, IC, IH, IW)).astype(np.float32) weight_fp32 = rng.uniform(low=-1, high=1, size=(IC, OC, KH, KW)).astype(np.float32) bias_fp32 = rng.uniform(low=-1, high=1, size=(1, OC, 1, 1)).astype(np.float32) inp_int8 = dtype.convert_to_qint8(inp_fp32, inp_dtype) weight_int8 = dtype.convert_to_qint8(weight_fp32, weight_dtype) bias_int32 = dtype.convert_to_qint32(bias_fp32, bias_dtype) inp_int8 = mge.tensor(inp_int8, dtype=inp_dtype) weight_int8 = mge.Parameter(weight_int8, dtype=weight_dtype) bias_int32 = mge.Parameter(bias_int32, dtype=bias_dtype) inp_fp32 = inp_int8.astype("float32") weight_fp32 = weight_int8.astype("float32") bias_fp32 = bias_int32.astype("float32") expected = F.conv_transpose2d( inp_fp32, weight_fp32, bias_fp32 if has_bias else None, stride=(SH, SW), padding=(PH, PW), dilation=(DH, DW), groups=groups, conv_mode=conv_mode, compute_mode=compute_mode, ) expected = dtype.convert_to_qint8(expected.numpy(), out_dtype) expected = dtype.convert_from_qint8(expected) conv_transpose2d = ConvTranspose2d( in_channels=IC, out_channels=OC, kernel_size=(KH, KW), stride=(SH, SW), padding=(PH, PW), dilation=(DH, DW), groups=groups, bias=has_bias, conv_mode=conv_mode, compute_mode=compute_mode, dtype=out_dtype, ) conv_transpose2d.weight = mge.Parameter(weight_int8) if has_bias: conv_transpose2d.bias = mge.Parameter(bias_int32) result = conv_transpose2d.forward(inp_int8).numpy() result = dtype.convert_from_qint8(result) np.testing.assert_allclose(result, expected, atol=out_scale)