Exemplo n.º 1
0
def test_sync_min_max_observer():
    word_size = get_device_count_by_fork("gpu")
    x = np.random.rand(3 * word_size, 3, 3, 3).astype("float32")
    np_min, np_max = x.min(), x.max()

    @dist.launcher
    def worker():
        rank = dist.get_rank()
        m = SyncMinMaxObserver()
        y = mge.tensor(x[rank * 3:(rank + 1) * 3])
        m(y)
        assert m.min_val == np_min and m.max_val == np_max

    worker()
Exemplo n.º 2
0
def test_sync_exponential_moving_average_observer():
    word_size = get_device_count_by_fork("gpu")
    t = np.random.rand()
    x1 = np.random.rand(3 * word_size, 3, 3, 3).astype("float32")
    x2 = np.random.rand(3 * word_size, 3, 3, 3).astype("float32")
    expected_min = x1.min() * t + x2.min() * (1 - t)
    expected_max = x1.max() * t + x2.max() * (1 - t)

    @dist.launcher
    def worker():
        rank = dist.get_rank()
        m = SyncExponentialMovingAverageObserver(momentum=t)
        y1 = mge.tensor(x1[rank * 3:(rank + 1) * 3])
        y2 = mge.tensor(x2[rank * 3:(rank + 1) * 3])
        m(y1)
        m(y2)
        np.testing.assert_allclose(m.min_val.numpy(), expected_min, atol=1e-6)
        np.testing.assert_allclose(m.max_val.numpy(), expected_max, atol=1e-6)

    worker()
Exemplo n.º 3
0
def as_tensor(x):
    return mge.Tensor(x)


def save_to(self, name="grad"):
    def callback(grad):
        setattr(self, name, grad)

    return callback


@pytest.mark.skipif(platform.system() == "Darwin",
                    reason="do not imp GPU mode at macos now")
@pytest.mark.skipif(platform.system() == "Windows",
                    reason="windows disable MGB_ENABLE_OPR_MM")
@pytest.mark.skipif(get_device_count_by_fork("gpu") < 2,
                    reason="need more gpu device")
@pytest.mark.isolated_distributed
def test_dist_grad():
    world_size = 2
    x_np = np.random.rand(10).astype("float32")

    @dist.launcher
    def worker():
        rank = dist.get_rank()
        if rank == 0:
            grad = Grad()

            x = as_tensor(x_np)
            grad.wrt(x, callback=save_to(x))
            # need a placeholder to trace operator
Exemplo n.º 4
0
import platform
import sys

import pytest

import megengine.functional
import megengine.module
from megengine import Parameter
from megengine.core._imperative_rt.core2 import sync
from megengine.distributed.helper import get_device_count_by_fork
from megengine.jit import trace as _trace
from megengine.module import Linear, Module

sys.path.append(os.path.join(os.path.dirname(__file__), "helpers"))

_ngpu = get_device_count_by_fork("gpu")


@pytest.fixture(autouse=True)
def skip_by_ngpu(request):
    if request.node.get_closest_marker("require_ngpu"):
        require_ngpu = int(
            request.node.get_closest_marker("require_ngpu").args[0])
        if require_ngpu > _ngpu:
            pytest.skip(
                "skipped for ngpu unsatisfied: {}".format(require_ngpu))


@pytest.fixture(autouse=True)
def skip_distributed(request):
    if request.node.get_closest_marker("distributed_isolated"):
Exemplo n.º 5
0
            no_trans,
            part_size,
            pooled_h,
            pooled_w,
            sample_per_part,
            spatial_scale,
            trans_std,
        )
        return y

    result = fwd(inp, rois, trans)
    check_pygraph_dump(fwd, [inp, rois, trans], [result])


@pytest.mark.skipif(
    get_device_count_by_fork("gpu") > 0,
    reason="does not support int8 when gpu compute capability less than 6.1",
)
def test_convbias():
    @trace(symbolic=True, capture_as_const=True)
    def fwd(inp, weight, bias):
        return F.quantized.conv_bias_activation(inp,
                                                weight,
                                                bias,
                                                dtype=dtype.qint8(scale=1.0),
                                                nonlinear_mode="relu")

    inp = Tensor(np.random.random((1, 3, 64, 64)),
                 dtype=dtype.qint8(scale=1.0))
    weight = Tensor(np.random.random((32, 3, 3, 3)),
                    dtype=dtype.qint8(scale=1.0))
Exemplo n.º 6
0
def test_min_max_observer():
    x = np.random.rand(3, 3, 3, 3).astype("float32")
    np_min, np_max = x.min(), x.max()
    x = mge.tensor(x)
    m = ob.MinMaxObserver()
    m(x)
    assert m.min_val == np_min and m.max_val == np_max


@pytest.mark.skipif(
    platform.system() == "Darwin", reason="do not imp GPU mode at macos now"
)
@pytest.mark.skipif(
    platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM"
)
@pytest.mark.skipif(get_device_count_by_fork("gpu") < 2, reason="need more gpu device")
@pytest.mark.isolated_distributed
def test_sync_min_max_observer():
    x = np.random.rand(6, 3, 3, 3).astype("float32")
    np_min, np_max = x.min(), x.max()
    world_size = 2
    port = dist.get_free_ports(1)[0]
    server = dist.Server(port)

    def worker(rank, slc):
        dist.init_process_group("localhost", port, world_size, rank, rank)
        m = ob.SyncMinMaxObserver()
        y = mge.tensor(x[slc])
        m(y)
        assert m.min_val == np_min and m.max_val == np_max
Exemplo n.º 7
0
from megengine import is_cuda_available, tensor
from megengine.core._imperative_rt import CompNode
from megengine.core._imperative_rt.core2 import apply
from megengine.core._imperative_rt.ops import (
    delete_rng_handle,
    get_global_rng_seed,
    new_rng_handle,
)
from megengine.core.ops.builtin import GaussianRNG, UniformRNG
from megengine.distributed.helper import get_device_count_by_fork
from megengine.random import RNG
from megengine.random.rng import _normal, _uniform


@pytest.mark.skipif(
    get_device_count_by_fork("xpu") <= 2,
    reason="xpu counts need > 2",
)
def test_gaussian_op():
    shape = (
        8,
        9,
        11,
        12,
    )
    shape = tensor(shape, dtype="int32")
    op = GaussianRNG(seed=get_global_rng_seed(), mean=1.0, std=3.0)
    (output, ) = apply(op, shape)
    assert np.fabs(output.numpy().mean() - 1.0) < 1e-1
    assert np.sqrt(output.numpy().var()) - 3.0 < 1e-1
    assert str(output.device) == str(CompNode("xpux"))
Exemplo n.º 8
0
        net.train()
        qat_net = quantize_qat(net, inplace=False)
        disable_fake_quant(qat_net)
        normal_outputs = net(inputs)
        qat_outputs = qat_net(inputs)
        np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy())

        net.eval()
        normal_outputs = net(inputs)
        qat_net.eval()
        qat_outputs = qat_net(inputs)
        np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy())


@pytest.mark.skipif(
    get_device_count_by_fork("gpu") > 0, reason="no int8 algorithm on cuda"
)
def test_qat_batchmatmul_activation():
    batch = 4
    in_features = 8
    out_features = 4

    class TestNet(Module):
        def __init__(self, bias):
            super().__init__()
            self.quant = QuantStub()
            self.dequant = DequantStub()
            self.batch_mm = BatchMatMulActivation(
                batch, in_features, out_features, bias=bias
            )