def test_bsr_sparse_conv2d_nhwc(): data = relay.var("data", shape=(1, 32, 32, 64), dtype="float32") x = relay.nn.relu(data) w = relay.var("weight", shape=(1, 1, 64, 128), dtype="float32") y = relay.nn.conv2d(x, w, channels=128, kernel_size=1, data_layout="NHWC", kernel_layout="HWIO") z = relay.nn.relu(y) func = relay.Function(relay.analysis.free_vars(z), z) params = { "weight": tvm.nd.array( np.array(random_bsr_matrix(128, 64, 8, 1, 0.1, "float32").todense()).T.reshape( 1, 1, 64, 128 ) ) } x_np = np.random.randn(1, 32, 32, 64).astype("float32") # dense output dense_output = run_func(func, params, x_np) # sparse sparse_func, params = relay.data_dep_optimization.bsr_conv2d.convert( func, params, (8, 1), 0.2, "NHWC" ) sparse_output = run_func(sparse_func, params, x_np) np.testing.assert_allclose(sparse_output, dense_output, atol=1e-5, rtol=1e-5)
def test_bsr_sparse_conv2d_3x3_nchw(): data = relay.var("data", shape=(1, 64, 32, 32), dtype="float32") x = relay.nn.relu(data) w = relay.var("weight", shape=(128, 64, 3, 3), dtype="float32") y = relay.nn.conv2d(x, w, channels=128, kernel_size=3, padding=1, data_layout="NCHW", kernel_layout="OIHW") z = relay.nn.relu(y) func = relay.Function(relay.analysis.free_vars(z), z) params = { "weight": tvm.nd.array( np.array( random_bsr_matrix(128, 64 * 9, 16, 1, 0.1, "float32").todense()).reshape(128, 64, 3, 3)) } x_np = np.random.randn(1, 64, 32, 32).astype("float32") # dense output dense_output = run_func(func, params, x_np) # sparse func = bind_params_by_name(func, params) sparse_func, params = relay.data_dep_optimization.bsr_conv2d.convert2( func, {}, (16, 1), 0.2, "NCHW", 3) sparse_output = run_func(sparse_func, params, x_np) np.testing.assert_allclose(sparse_output, dense_output, atol=1e-5, rtol=1e-5)
# To solve this problem, we register these as special buffers, and load them when process program # measuring. # See the `tvm.auto_scheduler.measure.py` for more details. # Define the basic shapes of this sparse computation M = 128 K = 256 N = 512 BS_R = 16 BS_C = 1 density = 0.6 # Generate the test data with numpy X_np = np.random.randn(M, K).astype("float32") X_np = np.maximum(np.zeros((M, K), dtype="float32"), X_np) # Relu W_sp_np = random_bsr_matrix(N, K, BS_R, BS_C, density=density, dtype="float32") W_np = W_sp_np.todense() Y_np = X_np @ W_np.T # Process the matrix multiplication B_np = np.random.randn(M, N).astype("float32") Y_np = Y_np + B_np # Bias add Y_np = np.maximum(np.zeros((M, N), dtype="float32"), Y_np) # Relu ###################################################################### # Create the search task # ^^^^^^^^^^^^^^^^^^^^^^ # We then create a search task with M=N=K=512 and dtype="float32" # If your machine supports avx instructions, you can # # - replace "llvm" below with "llvm -mcpu=core-avx2" to enable AVX2 # - replace "llvm" below with "llvm -mcpu=skylake-avx512" to enable AVX-512