def test_cffi_assembly(): mesh = UnitSquareMesh(MPI.COMM_WORLD, 13, 13) V = FunctionSpace(mesh, ("Lagrange", 1)) if mesh.mpi_comm().rank == 0: from cffi import FFI ffibuilder = FFI() ffibuilder.set_source( "_cffi_kernelA", r""" #include <math.h> #include <stdalign.h> void tabulate_tensor_poissonA(double* restrict A, const double* w, const double* c, const double* restrict coordinate_dofs, const int* entity_local_index, const int* cell_orientation) { // Precomputed values of basis functions and precomputations // FE* dimensions: [entities][points][dofs] // PI* dimensions: [entities][dofs][dofs] or [entities][dofs] // PM* dimensions: [entities][dofs][dofs] alignas(32) static const double FE3_C0_D01_Q1[1][1][2] = { { { -1.0, 1.0 } } }; // Unstructured piecewise computations const double J_c0 = coordinate_dofs[0] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[2] * FE3_C0_D01_Q1[0][0][1]; const double J_c3 = coordinate_dofs[1] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[5] * FE3_C0_D01_Q1[0][0][1]; const double J_c1 = coordinate_dofs[0] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[4] * FE3_C0_D01_Q1[0][0][1]; const double J_c2 = coordinate_dofs[1] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[3] * FE3_C0_D01_Q1[0][0][1]; alignas(32) double sp[20]; sp[0] = J_c0 * J_c3; sp[1] = J_c1 * J_c2; sp[2] = sp[0] + -1 * sp[1]; sp[3] = J_c0 / sp[2]; sp[4] = -1 * J_c1 / sp[2]; sp[5] = sp[3] * sp[3]; sp[6] = sp[3] * sp[4]; sp[7] = sp[4] * sp[4]; sp[8] = J_c3 / sp[2]; sp[9] = -1 * J_c2 / sp[2]; sp[10] = sp[9] * sp[9]; sp[11] = sp[8] * sp[9]; sp[12] = sp[8] * sp[8]; sp[13] = sp[5] + sp[10]; sp[14] = sp[6] + sp[11]; sp[15] = sp[12] + sp[7]; sp[16] = fabs(sp[2]); sp[17] = sp[13] * sp[16]; sp[18] = sp[14] * sp[16]; sp[19] = sp[15] * sp[16]; // UFLACS block mode: preintegrated A[0] = 0.5 * sp[19] + 0.5 * sp[18] + 0.5 * sp[18] + 0.5 * sp[17]; A[1] = -0.5 * sp[19] + -0.5 * sp[18]; A[2] = -0.5 * sp[18] + -0.5 * sp[17]; A[3] = -0.5 * sp[19] + -0.5 * sp[18]; A[4] = 0.5 * sp[19]; A[5] = 0.5 * sp[18]; A[6] = -0.5 * sp[18] + -0.5 * sp[17]; A[7] = 0.5 * sp[18]; A[8] = 0.5 * sp[17]; } void tabulate_tensor_poissonL(double* restrict A, const double* w, const double* c, const double* restrict coordinate_dofs, const int* entity_local_index, const int* cell_orientation) { // Precomputed values of basis functions and precomputations // FE* dimensions: [entities][points][dofs] // PI* dimensions: [entities][dofs][dofs] or [entities][dofs] // PM* dimensions: [entities][dofs][dofs] alignas(32) static const double FE4_C0_D01_Q1[1][1][2] = { { { -1.0, 1.0 } } }; // Unstructured piecewise computations const double J_c0 = coordinate_dofs[0] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[2] * FE4_C0_D01_Q1[0][0][1]; const double J_c3 = coordinate_dofs[1] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[5] * FE4_C0_D01_Q1[0][0][1]; const double J_c1 = coordinate_dofs[0] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[4] * FE4_C0_D01_Q1[0][0][1]; const double J_c2 = coordinate_dofs[1] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[3] * FE4_C0_D01_Q1[0][0][1]; alignas(32) double sp[4]; sp[0] = J_c0 * J_c3; sp[1] = J_c1 * J_c2; sp[2] = sp[0] + -1 * sp[1]; sp[3] = fabs(sp[2]); // UFLACS block mode: preintegrated A[0] = 0.1666666666666667 * sp[3]; A[1] = 0.1666666666666667 * sp[3]; A[2] = 0.1666666666666667 * sp[3]; } """) ffibuilder.cdef(""" void tabulate_tensor_poissonA(double* restrict A, const double* w, const double* c, const double* restrict coordinate_dofs, const int* entity_local_index, const int* cell_orientation); void tabulate_tensor_poissonL(double* restrict A, const double* w, const double* c, const double* restrict coordinate_dofs, const int* entity_local_index, const int* cell_orientation); """) ffibuilder.compile(verbose=True) mesh.mpi_comm().Barrier() from _cffi_kernelA import ffi, lib ptrA = ffi.cast("intptr_t", ffi.addressof(lib, "tabulate_tensor_poissonA")) integrals = {IntegralType.cell: ([(-1, ptrA)], None)} a = cpp.fem.Form([V._cpp_object, V._cpp_object], integrals, [], [], False) ptrL = ffi.cast("intptr_t", ffi.addressof(lib, "tabulate_tensor_poissonL")) integrals = {IntegralType.cell: ([(-1, ptrL)], None)} L = cpp.fem.Form([V._cpp_object], integrals, [], [], False) A = dolfinx.fem.assemble_matrix(a) A.assemble() b = dolfinx.fem.assemble_vector(L) b.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE) Anorm = A.norm(PETSc.NormType.FROBENIUS) bnorm = b.norm(PETSc.NormType.N2) assert (np.isclose(Anorm, 56.124860801609124)) assert (np.isclose(bnorm, 0.0739710713711999)) list_timings(MPI.COMM_WORLD, [TimingType.wall])
def test_cffi_assembly(): mesh = UnitSquareMesh(MPI.comm_world, 13, 13) V = FunctionSpace(mesh, ("Lagrange", 1)) if MPI.rank(mesh.mpi_comm()) == 0: from cffi import FFI ffibuilder = FFI() ffibuilder.set_source( "_cffi_kernelA", r""" #include <math.h> #include <stdalign.h> void tabulate_tensor_poissonA(double* restrict A, const double* w, const double* restrict coordinate_dofs, int cell_orientation) { // Precomputed values of basis functions and precomputations // FE* dimensions: [entities][points][dofs] // PI* dimensions: [entities][dofs][dofs] or [entities][dofs] // PM* dimensions: [entities][dofs][dofs] alignas(32) static const double FE3_C0_D01_Q1[1][1][2] = { { { -1.0, 1.0 } } }; // Unstructured piecewise computations const double J_c0 = coordinate_dofs[0] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[2] * FE3_C0_D01_Q1[0][0][1]; const double J_c3 = coordinate_dofs[1] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[5] * FE3_C0_D01_Q1[0][0][1]; const double J_c1 = coordinate_dofs[0] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[4] * FE3_C0_D01_Q1[0][0][1]; const double J_c2 = coordinate_dofs[1] * FE3_C0_D01_Q1[0][0][0] + coordinate_dofs[3] * FE3_C0_D01_Q1[0][0][1]; alignas(32) double sp[20]; sp[0] = J_c0 * J_c3; sp[1] = J_c1 * J_c2; sp[2] = sp[0] + -1 * sp[1]; sp[3] = J_c0 / sp[2]; sp[4] = -1 * J_c1 / sp[2]; sp[5] = sp[3] * sp[3]; sp[6] = sp[3] * sp[4]; sp[7] = sp[4] * sp[4]; sp[8] = J_c3 / sp[2]; sp[9] = -1 * J_c2 / sp[2]; sp[10] = sp[9] * sp[9]; sp[11] = sp[8] * sp[9]; sp[12] = sp[8] * sp[8]; sp[13] = sp[5] + sp[10]; sp[14] = sp[6] + sp[11]; sp[15] = sp[12] + sp[7]; sp[16] = fabs(sp[2]); sp[17] = sp[13] * sp[16]; sp[18] = sp[14] * sp[16]; sp[19] = sp[15] * sp[16]; // UFLACS block mode: preintegrated A[0] = 0.5 * sp[19] + 0.5 * sp[18] + 0.5 * sp[18] + 0.5 * sp[17]; A[1] = -0.5 * sp[19] + -0.5 * sp[18]; A[2] = -0.5 * sp[18] + -0.5 * sp[17]; A[3] = -0.5 * sp[19] + -0.5 * sp[18]; A[4] = 0.5 * sp[19]; A[5] = 0.5 * sp[18]; A[6] = -0.5 * sp[18] + -0.5 * sp[17]; A[7] = 0.5 * sp[18]; A[8] = 0.5 * sp[17]; } void tabulate_tensor_poissonL(double* restrict A, const double* w, const double* restrict coordinate_dofs, int cell_orientation) { // Precomputed values of basis functions and precomputations // FE* dimensions: [entities][points][dofs] // PI* dimensions: [entities][dofs][dofs] or [entities][dofs] // PM* dimensions: [entities][dofs][dofs] alignas(32) static const double FE4_C0_D01_Q1[1][1][2] = { { { -1.0, 1.0 } } }; // Unstructured piecewise computations const double J_c0 = coordinate_dofs[0] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[2] * FE4_C0_D01_Q1[0][0][1]; const double J_c3 = coordinate_dofs[1] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[5] * FE4_C0_D01_Q1[0][0][1]; const double J_c1 = coordinate_dofs[0] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[4] * FE4_C0_D01_Q1[0][0][1]; const double J_c2 = coordinate_dofs[1] * FE4_C0_D01_Q1[0][0][0] + coordinate_dofs[3] * FE4_C0_D01_Q1[0][0][1]; alignas(32) double sp[4]; sp[0] = J_c0 * J_c3; sp[1] = J_c1 * J_c2; sp[2] = sp[0] + -1 * sp[1]; sp[3] = fabs(sp[2]); // UFLACS block mode: preintegrated A[0] = 0.1666666666666667 * sp[3]; A[1] = 0.1666666666666667 * sp[3]; A[2] = 0.1666666666666667 * sp[3]; } """) ffibuilder.cdef(""" void tabulate_tensor_poissonA(double* restrict A, const double* w, const double* restrict coordinate_dofs, int cell_orientation); void tabulate_tensor_poissonL(double* restrict A, const double* w, const double* restrict coordinate_dofs, int cell_orientation); """) ffibuilder.compile(verbose=True) MPI.barrier(mesh.mpi_comm()) from _cffi_kernelA import ffi, lib a = cpp.fem.Form([V._cpp_object, V._cpp_object]) ptrA = ffi.cast("intptr_t", ffi.addressof(lib, "tabulate_tensor_poissonA")) a.set_tabulate_cell(0, ptrA) L = cpp.fem.Form([V._cpp_object]) ptrL = ffi.cast("intptr_t", ffi.addressof(lib, "tabulate_tensor_poissonL")) L.set_tabulate_cell(0, ptrL) A = dolfin.fem.assemble(a) b = dolfin.fem.assemble(L) Anorm = A.norm(PETSc.NormType.FROBENIUS) bnorm = b.norm(PETSc.NormType.N2) assert (np.isclose(Anorm, 56.124860801609124)) assert (np.isclose(bnorm, 0.0739710713711999)) list_timings([TimingType.wall])
for i in range(num_cells): kernel(ffi.from_buffer(Aarr), ffi.from_buffer(warr), ffi.from_buffer(constants), ffi.from_buffer(geometry)) run_pure(cexpr.module.tabulate_expression) pure_ffcx_times = [] for i in range(repeats): t0 = time.time() run_pure(cexpr.module.tabulate_expression) tok = time.time() - t0 print(f"Pure loop FFCx kernel {tok}") pure_ffcx_times.append(tok) hand_kernel = ffi.cast("void(*)(double *, double *, double *, double *)", ffi.addressof(lib, "tabulate_expression")) pure_hand_times = [] for i in range(repeats): t0 = time.time() run_pure(hand_kernel) tok = time.time() - t0 print(f"Pure loop hand kernel {tok}") pure_hand_times.append(tok) cexpr.module.tabulate_expression = hand_kernel hand_times = [] for i in range(repeats): t0 = time.time() dolfiny.interpolation.interpolate_cached(cexpr, lam_hand)