示例#1
0
def test_explicit_init():
    import nums
    import nums.core.application_manager as am

    nums.init()
    assert am.is_initialized()
    am.destroy()
示例#2
0
def nps_app_inst(request):
    # This triggers initialization; it's not to be mixed with the app_inst fixture.
    # Observed (core dumped) after updating this fixture to run functions with "serial" backend.
    # Last time this happened, it was due poor control over the
    # scope and duration of ray resources.
    # pylint: disable = import-outside-toplevel
    from nums.core import settings
    from nums.core import application_manager
    settings.system_name = request.param
    yield application_manager.instance()
    application_manager.destroy()
示例#3
0
def test_app_manager():
    for compute_name in ["numpy"]:
        for system_name in ["serial", "ray-cyclic", "ray-task"]:
            settings.compute_name = compute_name
            settings.system_name = system_name
            app: ArrayApplication = application_manager.instance()
            assert np.allclose(
                np.arange(10),
                app.arange(0, shape=(10, ), block_shape=(10, )).get())
            application_manager.destroy()
            assert not application_manager.is_initialized()
            time.sleep(1)
示例#4
0
def nps_app_inst(request):
    # This triggers initialization; it's not to be mixed with the app_inst fixture.
    # Observed (core dumped) after updating this fixture to run functions with "serial" backend.
    # Last time this happened, it was due poor control over the
    # scope and duration of ray resources.
    # pylint: disable = import-outside-toplevel
    from nums.core import settings
    from nums.core import application_manager
    import nums.numpy as nps
    settings.system_name, settings.device_grid_name = request.param

    # Need to reset numpy random state.
    # It's the only stateful numpy API object.
    nps.random.reset()
    yield application_manager.instance()
    application_manager.destroy()
示例#5
0
def test_app_manager(compute_name, system_name, device_grid_name):
    settings.use_head = True
    settings.compute_name = compute_name
    settings.system_name = system_name
    settings.device_grid_name = device_grid_name

    app: ArrayApplication = application_manager.instance()
    app_arange = app.arange(0, shape=(10, ), block_shape=(10, ))
    assert np.allclose(np.arange(10), app_arange.get())
    application_manager.destroy()
    assert not application_manager.is_initialized()
    time.sleep(1)

    # Revert for other tests.
    settings.compute_name = "numpy"
    settings.system_name = "ray"
    settings.device_grid_name = "cyclic"
示例#6
0
def test_app_manager(compute_name, system_name, device_grid_name, num_cpus):
    settings.use_head = True
    settings.compute_name = compute_name
    settings.system_name = system_name
    settings.device_grid_name = device_grid_name
    settings.num_cpus = num_cpus

    app: ArrayApplication = application_manager.instance()
    print(settings.num_cpus, num_cpus, app.cm.num_cores_total())
    app_arange = app.arange(0, shape=(10, ), block_shape=(10, ))
    assert np.allclose(np.arange(10), app_arange.get())
    if num_cpus is None:
        assert app.cm.num_cores_total() == get_num_cores()
    else:
        assert app.cm.num_cores_total() == num_cpus
    application_manager.destroy()
    assert not application_manager.is_initialized()
    time.sleep(1)

    # Revert for other tests.
    settings.compute_name = "numpy"
    settings.system_name = "ray"
    settings.device_grid_name = "cyclic"
    settings.num_cpus = None
def benchmark_mlp(num_gpus,
                  N_list,
                  system_class_list,
                  d=1000,
                  optimizer=True,
                  dtype=np.float32):
    format_string = "%20s,%10s,%10s,%10s,%10s,%10s"
    print(format_string %
          ("Library", "N", "Cost", "CostOpt", "CostInit", "CV"))
    global app

    for N in N_list:
        N = int(N)

        for system_class in system_class_list:
            # try:
            if True:
                if system_class in ["Cupy", "Numpy"]:
                    name = system_class
                    import cupy as cp

                    arr_lib = cp if system_class == "Cupy" else np
                    app = arr_lib

                    X, y = np_sample(np, sample_size=N, feature=d, dtype=dtype)
                    W_in_1, W_1_2, W_2_out = np_init_weights(np,
                                                             X,
                                                             y,
                                                             dtype=dtype)

                    X = cp.asarray(X)
                    y = cp.asarray(y)
                    W_in_1 = cp.asarray(W_in_1)
                    W_1_2 = cp.asarray(W_1_2)
                    W_2_out = cp.asarray(W_2_out)

                    cp.cuda.Device(0).synchronize()

                    # Benchmark one step mlp
                    def func():
                        tic = time.time()
                        toc_end = one_step_fit_np(arr_lib, X, y, W_in_1, W_1_2,
                                                  W_2_out)
                        cp.cuda.Device(0).synchronize()
                        toc = time.time()
                        return toc - tic, toc_end - tic, 0, None

                    costs, costs_opt, costs_init = benchmark_func(func)
                    del (X, y, W_in_1, W_1_2, W_2_out)
                else:
                    # Init system
                    name = system_class.__name__
                    app = am.instance(num_gpus, optimizer)

                    # Make dataset
                    nps.random.seed(0)
                    X, y = sample(app,
                                  sample_size=N,
                                  feature=d,
                                  num_gpus=num_gpus,
                                  dtype=dtype)
                    W_in_1, W_1_2, W_2_out = data_init_weights(app,
                                                               X,
                                                               y,
                                                               verbose=False)

                    # Benchmark one step MLP
                    def func():
                        tic = time.time()
                        if optimizer:
                            toc_init, toc_opt = one_step_fit_opt(app,
                                                                 X,
                                                                 y,
                                                                 W_in_1,
                                                                 W_1_2,
                                                                 W_2_out,
                                                                 num_gpus,
                                                                 verbose=False)
                        else:
                            toc_init = tic
                            toc_opt = one_step_fit(app, X, y, W_in_1, W_1_2,
                                                   W_2_out)

                        toc = time.time()
                        return toc - tic, toc_opt - tic, toc_init - tic, None

                    costs, costs_opt, costs_init = benchmark_func(func)

                    del (X, y, W_in_1, W_1_2, W_2_out)
                    am.destroy()
            # except Exception:
            else:
                costs = [-1]
                costs_opt = [-1]
                costs_init = [-1]

            log_str = format_string % (
                name,
                "%d" % N,
                "%.4f" % np.mean(costs),
                "%.4f" % np.mean(costs_opt),
                "%.4f" % np.mean(costs_init),
                "%.2f" % (np.std(costs) / np.mean(costs)),
            )

            print(log_str)
            with open("result_mlp_data.csv", "a") as f:
                f.write(log_str + "\n")
示例#8
0
ind = random.sample(range(w * h), sparsity)
ind = [(i % w, i // w) for i in ind]

for i in ind:
    arr[i] = np.random.randint(0, 100)

dtype = np.__getattribute__(str(arr.dtype))
shape = arr.shape
app = _instance()
block_shape = app.compute_block_shape(shape, dtype)

sparse_result = SparseBlockArray.from_np(arr,
                                         block_shape=block_shape,
                                         copy=False,
                                         system=app.system)
dense_result = BlockArray.from_np(arr,
                                  block_shape=block_shape,
                                  copy=False,
                                  system=app.system)

funcs = [
    lambda x: x @ x,
    lambda x: x + x,
    lambda x: x - x,
    # lambda x: x ** x,
]
for f in funcs:
    assert (f(sparse_result).get() == f(dense_result).get()).all()

destroy()
def benchmark_bop(num_gpus, N_list, system_class_list, d=400000, optimizer=True, dtype=np.float32):
    format_string = "%20s,%10s,%10s,%10s,%10s,%10s"
    print(format_string % ("Library", "N", "Cost", "CostOpt", "CostInit", "CV"))
    # global app

    for N in N_list:
        N = int(N)
        d1 = N
        d2 = d
        for system_class in system_class_list:
            # try:
            if True:
                if system_class in ["Cupy", "Numpy"]:
                    name = system_class
                    import cupy as cp

                    arr_lib = cp if system_class == "Cupy" else np
                    arr_lib.inv = arr_lib.linalg.inv
                    app = arr_lib

                    # X = arr_lib.ones((N, d), dtype=dtype)

                    W = arr_lib.ones(shape=(d1, d2), dtype=dtype)
                    D = arr_lib.ones(shape=(d2, N), dtype=dtype)
                    # Prevent the Singular matrix Error in np.linalg.inv
                    # arange = arr_lib.arange(N)
                    # X[arange, arange % d] = 1
                    cp.cuda.Device(0).synchronize()

                    # Benchmark bop
                    def func():
                        tic = time.time()
                        Z = W @ D
                        # Z = X.T @ X
                        cp.cuda.Device(0).synchronize()
                        toc = time.time()
                        return toc - tic, 0, 0, None

                    costs, costs_opt, costs_init = benchmark_func(func)
                    # del (X, app)
                    del (W, D, app)
                else:
                    # Init system
                    name = system_class.__name__
                    app = am.instance(num_gpus, optimizer)

                    W = app.ones(shape=(d1, d2), block_shape=(d1, d2 // num_gpus), dtype=dtype)
                    D = app.ones(shape=(d2, N), block_shape=(d2 // num_gpus, N), dtype=dtype)
                    # X = app.ones((N, d), block_shape=(N // num_gpus, d), dtype=dtype)

                    # Benchmark bop
                    def func():
                        tic = time.time()
                        if optimizer:
                            toc_init, toc_opt = matmul_opt(app, W, D, num_gpus)
                            # toc_init, toc_opt = matmul_opt(app, X, num_gpus)
                        else:
                            Z = (W @ D).touch()
                            # Z = (X.T @ X).touch()
                        toc = time.time()
                        return toc - tic, 0, 0, None

                    costs, costs_opt, costs_init = benchmark_func(func)

                    del (W, D)
                    am.destroy()
            #except Exception:
            else:
                costs = [-1]
                costs_opt = [-1]
                costs_init = [-1]

            log_str = format_string % (
                name,
                "%d" % N,
                "%.4f" % np.mean(costs),
                "%.4f" % np.mean(costs_opt),
                "%.4f" % np.mean(costs_init),
                "%.2f" % (np.std(costs) / np.mean(costs)),
            )
            print(log_str)
            with open("result_bop.csv", "a") as f:
                f.write(log_str + "\n")