def test_numpy_inline(): c_code = """ double sum_of_some_func(int n1, double* array1){ double tmp = 0.0; for (int i=0; i<n1; i++) { tmp += some_func(array1[i]); } return tmp; } """ some_func = inline_with_numpy(c_code, arrays=[['n1', 'array1']], local_headers=["./some_func.h"], libraries = ["m"]) a = numpy.arange(10000000); a = numpy.sin(a) t1 = time.time() b = some_func(a) t2 = time.time() print('With instant:', t2-t1, 'seconds') t1 = time.time() c = sum(numpy.sin(a) + numpy.cos(a)) t2 = time.time() print('With numpy :', t2-t1, 'seconds') assert abs(c - b) < 1.0e-12
def ext_gridloop1_instant(self, fstr): if not isinstance(fstr, str): raise TypeError, \ 'fstr must be string expression, not', type(fstr) # generate C source for gridloop1: # (no call to C function f(x,y), fstr is inserted in the loop) source = """ void gridloop1(double *a, int nx, int ny, double *xcoor, double *ycoor) { # define index(a, i, j) a[i*ny + j] int i, j; double x, y; for (i=0; i<nx; i++) { for (j=0; j<ny; j++) { x = xcoor[i]; y = ycoor[i]; index(a, i, j) = %s } } } """ % fstr try: from instant import inline_with_numpy a = zeros((self.nx, self.ny)) arrays = [['nx', 'ny', 'a'], ['nx', 'xcoor'], ['ny', 'ycoor']] self.gridloop1_instant = \ inline_with_numpy(source, arrays=arrays) except: self.gridloop1_instant = None
def solver_instant(u,f,n=50,m=100,t0=0,t1=1000,dt=.1,nu=1): """ This function solve the heat equation Parameteres: ------------ u: initial distribution numpy array (M*N) f: Heat source function numpy array (M*N) t0: Start time t1: End time dt: Time step nu: Thermal diffusivity Return: ------ u: Updated u """ c_code = """ double calculate (int x1, int y1, double* u,int x2, int y2, double* f,int x3,double* args){ double t0=args[0]; double t1=args[1]; double dt=args[2]; double nu=args[3]; double u_new[x1*y1]; double counterLoop=t0; for (int i=0; i<x1; i++) for (int j=0; j<y1; j++) u_new[i*y1 + j]=u[i*y1 + j]; while(counterLoop<=t1) { for(int i=1;i<x1;i++) { for(int j=1;j<y1;j++) u_new[i*y1+j]=u[i*y1+j] + dt * (nu*u[(i-1)*y1+j] + nu*u[i*y1+j-1] - 4*nu*u[i*y1+j] + nu*u[i*y1+j+1] + nu*u[(i+1)*y1+j] + f[i*y1+j]); } for (int i=1; i<x1-1; i++) for (int j=1; j<y1-1; j++) u[i*y1 + j]=u_new[i*y1 + j]; counterLoop+=dt; } } """ call_func = inline_with_numpy(c_code, arrays = [['x1', 'y1', 'u'], ['x2', 'y2', 'f'], ['x3','args']], cache_dir="_cache") params=ones(4) params[0]=t0 params[1]=t1 params[2]=dt params[3]=nu call_func(u,f,params) return u
def build_cfuntions(self): c_code__isnotzero = """ bool isnotzero(int row_n, double *row, double zTOL) { for ( int i=0; i<row_n; i++ ) {if (row[i] > zTOL || -row[i] > zTOL) return true;} return false; } """ args = [["row_n", "row"]] self.c_isnotzero = inline_with_numpy(c_code__isnotzero, arrays=args)
def load_c_code(): """ Loads the C-code in the file dmdt.c, that will later get called to compute the right-hand side of the LLG equation. """ __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) with open(os.path.join(__location__, 'dmdt.c'), "r") as f: c_code = f.read() args = [["Mn", "M", "in"], ["Hn", "H", "in"], ["dMdtn", "dMdt", "out"], ["Pn", "P", "in"]] return instant.inline_with_numpy(c_code, arrays=args)
def SolverInstant(f, nu=1, dt=0.1, n=50, m=100, t0=0, t_end=1000, u0=None): """ Solver for heat equation. Solved in C using Instant. Dirichlet boundary conditions: ( u_edge = 0 ) If possible, time loop will automatically be done in C for additional speed. """ t = t0 t_end = t_end + 1E-8 no_anim_print_prog = False # Initiate the solution array for u and u_new (un) u = np.zeros((n, m)) if u0 == None else u0 un = np.zeros((n, m)) if u0 == None else u0.copy() # Keep time loop in C-code for improved speed c_code = """ void heateq(int ux, int uy, double* u, int unx, int uny, double* un, int fx, int fy, double* f){ double dt = %f; double nu = %f; double t_end = %f; int t,i,j; for (t=0; t*dt<t_end+dt; t++){ for (i=1; i<ux-1; i++){ for (j=1; j<uy-1; j++){ un[i*uy + j] = u[i*uy +j] \ + dt*nu*(u[(i-1)*uy +j] + u[i*uy +j-1] - 4*u[i*uy +j] \ + u[i*uy +j+1] + u[(i+1)*uy +j]) + dt*nu*f[i*uy +j]; } } for (i=1; i<ux-1; i++) { for (j=1; j<uy-1; j++) { u[i*uy + j] = un[i*uy + j]; } } } } """ % (dt, nu, t_end) # Add these values into C-code "as text" heateq_func = inline_with_numpy(c_code, arrays=[['ux', 'uy', 'u'], ['unx', 'uny', 'un'], ['fx', 'fy', 'f']]) heateq_func(u, un, f) # Run through all time steps in C return u
def compile(self): if hasattr(self, '_fun'): return self._fun from instant import inline_with_numpy strip = lambda code: '\n'.join([l for l in code.splitlines() if l.strip() and l.strip() != ';']) if any(arg._is_soa for arg in self._args): kernel_code = """ #define OP2_STRIDE(a, idx) a[idx] %(code)s #undef OP2_STRIDE """ % {'code': self._kernel.code} else: kernel_code = """ %(code)s """ % {'code': self._kernel.code} code_to_compile = strip(dedent(self._wrapper) % self.generate_code()) if configuration["debug"]: self._wrapper_code = code_to_compile _const_decs = '\n'.join([const._format_declaration() for const in Const._definitions()]) + '\n' self._dump_generated_code(code_to_compile) # We need to build with mpicc since that's required by PETSc cc = os.environ.get('CC') os.environ['CC'] = 'mpicc' self._fun = inline_with_numpy( code_to_compile, additional_declarations=kernel_code, additional_definitions=_const_decs + kernel_code, cppargs=self._cppargs + (['-O0', '-g'] if configuration["debug"] else []), include_dirs=[d + '/include' for d in get_petsc_dir()], source_directory=os.path.dirname(os.path.abspath(__file__)), wrap_headers=["mat_utils.h"], system_headers=self._system_headers, library_dirs=[d + '/lib' for d in get_petsc_dir()], libraries=['petsc'] + self._libraries, sources=["mat_utils.cxx"], modulename=self._kernel.name if configuration["debug"] else None) if cc: os.environ['CC'] = cc else: os.environ.pop('CC') return self._fun
def SolverInstant(f, nu=1, dt=0.1, n=50, m=100, t0 = 0, t_end=1000, u0=None): """ Solver for heat equation. Solved in C using Instant. Dirichlet boundary conditions: ( u_edge = 0 ) If possible, time loop will automatically be done in C for additional speed. """ t = t0; t_end = t_end + 1E-8 no_anim_print_prog = False # Initiate the solution array for u and u_new (un) u = np.zeros((n,m)) if u0 == None else u0 un = np.zeros((n,m)) if u0 == None else u0.copy() # Keep time loop in C-code for improved speed c_code = """ void heateq(int ux, int uy, double* u, int unx, int uny, double* un, int fx, int fy, double* f){ double dt = %f; double nu = %f; double t_end = %f; int t,i,j; for (t=0; t*dt<t_end+dt; t++){ for (i=1; i<ux-1; i++){ for (j=1; j<uy-1; j++){ un[i*uy + j] = u[i*uy +j] \ + dt*nu*(u[(i-1)*uy +j] + u[i*uy +j-1] - 4*u[i*uy +j] \ + u[i*uy +j+1] + u[(i+1)*uy +j]) + dt*nu*f[i*uy +j]; } } for (i=1; i<ux-1; i++) { for (j=1; j<uy-1; j++) { u[i*uy + j] = un[i*uy + j]; } } } } """%(dt,nu,t_end) # Add these values into C-code "as text" heateq_func = inline_with_numpy(c_code, arrays = [['ux', 'uy', 'u'], ['unx', 'uny', 'un'], ['fx', 'fy', 'f']]) heateq_func(u,un,f) # Run through all time steps in C return u
def test_grid_loop(): func_str = "sin" c_code = """ void gridloop(int x1, int y1, double* a, int n, double* x, int m, double* y) { for (int i=0; i<n; i++) { for (int j=0; j<m; j++) { a[i*n +j] = %s(x[i] + y[j]); } } } """ % func_str n = 5000 a = N.zeros([n, n]) x = N.arange(0.0, n, 1.0) y = N.arange(0.0, n, 1.0) arrays = [['x1', 'y1', 'a'], ['n', 'x'], ['m', 'y']] grid_func = inline_with_numpy(c_code, arrays=arrays) t1 = time.time() grid_func(a, x, y) t2 = time.time() print('With instant:', t2 - t1, 'seconds') xv = x[:, N.newaxis] yv = y[N.newaxis, :] a2 = N.zeros([n, n]) t1 = time.time() a2[:, :] = N.sin(xv + yv) t2 = time.time() print('With numpy:', t2 - t1, 'seconds') d = a - a2 d.shape = (n * n, ) assert abs(max(d)) < 1.0e-12
def test_compare_numpy(): sum_func = inline_with_numpy(c_code, arrays=[['n1', 'array1']], cache_dir="test_cache") a = numpy.arange(10000000) a = numpy.sin(a) t1 = time.time() sum1 = sum_func(a) t2 = time.time() print('With instant:', t2 - t1, 'seconds') t1 = time.time() sum2 = numpy.sum(a) t2 = time.time() print('With numpy: ', t2 - t1, 'seconds') difference = abs(sum1 - sum2) assert difference < 1.0e-12
def compile(self): if hasattr(self, '_fun'): return self._fun from instant import inline_with_numpy if any(arg._is_soa for arg in self._args): kernel_code = """ #define OP2_STRIDE(a, idx) a[idx] inline %(code)s #undef OP2_STRIDE """ % {'code' : self._kernel.code} else: kernel_code = """ inline %(code)s """ % {'code' : self._kernel.code } code_to_compile = dedent(self.wrapper) % self.generate_code() _const_decs = '\n'.join([const._format_declaration() for const in Const._definitions()]) + '\n' # We need to build with mpicc since that's required by PETSc cc = os.environ.get('CC') os.environ['CC'] = 'mpicc' self._fun = inline_with_numpy(code_to_compile, additional_declarations = kernel_code, additional_definitions = _const_decs + kernel_code, cppargs=self._cppargs + ['-O0', '-g'] if cfg.debug else [], include_dirs=[OP2_INC, get_petsc_dir()+'/include'], source_directory=os.path.dirname(os.path.abspath(__file__)), wrap_headers=["mat_utils.h"], system_headers=self._system_headers, library_dirs=[OP2_LIB, get_petsc_dir()+'/lib'], libraries=['op2_seq', 'petsc'] + self._libraries, sources=["mat_utils.cxx"]) if cc: os.environ['CC'] = cc else: os.environ.pop('CC') return self._fun
if ( n != m ) { printf("n and m should be equal"); return; } for (int i=0; i<m; i++) { y[i] = sin(x[i]); } } """ N = 8000000 compute_func = inline_with_numpy(c_code, arrays=[['n', 'x'], ['m', 'y']], cppargs=['-fopenmp'], lddargs=['-lgomp'], system_headers=["omp.h"]) x = arange(0, 1, 1.0 / N) y = arange(0, 1, 1.0 / N) t1 = time.time() t3 = time.clock() compute_func(x, y) t2 = time.time() t4 = time.clock() print('With instant and OpenMP', t4 - t3, 'seconds process time') print('With instant and OpenMP', t2 - t1, 'seconds wall time') compute_func_scalar = inline_with_numpy(c_code_scalar, arrays=[['n', 'x'], ['m', 'y']])
id = omp_get_thread_num(); printf("Thread %d\n", id); p[0] = p0; #pragma omp for for (int i=1; i<n; i++) { p[i] = p[i-1] + dt*(B*Q[i] - A*p[i-1]); } } } ''' N = 20000000 time_loop = inline_with_numpy(c_code, arrays=[['n', 'p'], ['m', 'Q']], cppargs=['-fopenmp'], lddargs=['-lgomp'], system_headers=['omp.h']) p = zeros(N) T = 20.0 Q = sin(arange(0, T, T / N)) + 1 t1 = time.time() t3 = time.clock() time_loop(p, Q, 1.0, 1.0, 1.0 / N, N, 1.0) t2 = time.time() t4 = time.clock() print('With instant and OpenMP:', t4 - t3, 'seconds process time') print('With instant and OpenMP:', t2 - t1, 'seconds process time')
import numpy from instant import inline_with_numpy # Example 2: two array, both inout and of same size # Cannot avoid specifying all dimensions for both arrays c_code = """ double sum (int x1, int y1, double* array1, int x2, int y2, double* array2){ double tmp = 0.0; for (int i=0; i<x1; i++) for (int j=0; j<y1; j++){ tmp = array1[i*y1 + j]; array1[i*y1 + j] = array2[i*y1 + j]; array2[i*y1 + j] = tmp; } return tmp; } """ sum_func = inline_with_numpy(c_code, arrays = [['x1', 'y1', 'array1'], ['x2', 'y2', 'array2']], cache_dir="test_ex2_cache") a = numpy.ones(4) a.shape = (2, 2) b = a.copy() a *= 2 print(sum_func(a, b))
def return_bele_magpar(): args = [["n_bvert", "bvert", "in"], ["facv1_n", "facv1", "in"], ["facv2_n", "facv2", "in"], ["facv3_n", "facv3", "in"], ["matele_n", "matele"]] return instant.inline_with_numpy(C_CODE, arrays=args)
import numpy from instant import inline_with_numpy # Example 2: two array, both inout and of same size # Cannot avoid specifying all dimensions for both arrays c_code = """ double sum (int x1, int y1, double* array1, int x2, int y2, double* array2){ double tmp = 0.0; for (int i=0; i<x1; i++) for (int j=0; j<y1; j++){ tmp = array1[i*y1 + j]; array1[i*y1 + j] = array2[i*y1 + j]; array2[i*y1 + j] = tmp; } return tmp; } """ sum_func = inline_with_numpy(c_code, arrays = [['x1', 'y1', 'array1'], ['x2', 'y2', 'array2']], cache_dir="test_ex2_cache") a = numpy.ones(4) a.shape = (2, 2) b = a.copy() a *= 2
S[0] = S0; I[0] = I0; Z[0] = Z0; R[0] = R0; for (int i=0; i<N-1; i++) { S[i+1] = S[i] + dt*(PI - beta*S[i]*Z[i] - delta*S[i]); I[i+1] = I[i] + dt*(beta*S[i]*Z[i] - (rho + delta)*I[i]); Z[i+1] = Z[i] + dt*(rho*I[i] + zeta*R[i] - alpha*S[i]*Z[i]); R[i+1] = R[i] + dt*(delta*S[i] + delta*I[i] + alpha*S[i]*Z[i] - zeta*R[i]); } } """ N = 100 time_loop = inline_with_numpy(c_code, arrays=[['nS', 'S'], ['nI', 'I'], ['nZ', 'Z'], ['nR', 'R']], cache_dir="zombie_cache") S = zeros(N) I = zeros(N) Z = zeros(N) R = zeros(N) T = 100 dt = T / (N - 1) PI = 0.0 alpha = 0.05 beta = 0.095 zeta = 0.01 delta = 0.01 rho = 0.001
} else{ const int D = Dims_lhs[0]; for(int d=0; d != D; ++d){ result[d]+=lhs[d]; for(int k=0; k!=d; ++k){ result[d] -= result[k]*rhs[d-k]; } result[d] /= rhs[0]; } } } """ adouble__add__ = instant.inline_with_numpy( c_code_adouble__add__, arrays=[['Ndim_lhs', 'Dims_lhs', 'lhs'], ['Ndim_rhs', 'Dims_rhs', 'rhs'], ['Ndim_result', 'Dims_result', 'result']]) adouble__sub__ = instant.inline_with_numpy( c_code_adouble__sub__, arrays=[['Ndim_lhs', 'Dims_lhs', 'lhs'], ['Ndim_rhs', 'Dims_rhs', 'rhs'], ['Ndim_result', 'Dims_result', 'result']]) adouble__mul__ = instant.inline_with_numpy( c_code_adouble__mul__, arrays=[['Ndim_lhs', 'Dims_lhs', 'lhs'], ['Ndim_rhs', 'Dims_rhs', 'rhs'], ['Ndim_result', 'Dims_result', 'result']]) adouble__div__ = instant.inline_with_numpy( c_code_adouble__div__, arrays=[['Ndim_lhs', 'Dims_lhs', 'lhs'], ['Ndim_rhs', 'Dims_rhs', 'rhs'], ['Ndim_result', 'Dims_result', 'result']]) adouble__imul__ = instant.inline_with_numpy( c_code_adouble__imul__,
for(int k=0; k!=d; ++k){ for(int n=0; n != Ndir; ++n){ result[d*Ndir + n] -= result[k*Ndir + n]*rhs[(d-k)*Ndir + n]; } } for(int n=0; n != Ndir; ++n){ result[d*Ndir + n] /= rhs[0+n]; } } } else{ const int D = Dims_lhs[0]; for(int d=0; d != D; ++d){ result[d]+=lhs[d]; for(int k=0; k!=d; ++k){ result[d] -= result[k]*rhs[d-k]; } result[d] /= rhs[0]; } } } """ adouble__add__ = instant.inline_with_numpy(c_code_adouble__add__, arrays=[['Ndim_lhs', 'Dims_lhs', 'lhs'], ['Ndim_rhs', 'Dims_rhs', 'rhs'], ['Ndim_result', 'Dims_result', 'result']] ) adouble__sub__ = instant.inline_with_numpy(c_code_adouble__sub__, arrays=[['Ndim_lhs', 'Dims_lhs', 'lhs'], ['Ndim_rhs', 'Dims_rhs', 'rhs'], ['Ndim_result', 'Dims_result', 'result']] ) adouble__mul__ = instant.inline_with_numpy(c_code_adouble__mul__, arrays=[['Ndim_lhs', 'Dims_lhs', 'lhs'], ['Ndim_rhs', 'Dims_rhs', 'rhs'], ['Ndim_result', 'Dims_result', 'result']] ) adouble__div__ = instant.inline_with_numpy(c_code_adouble__div__, arrays=[['Ndim_lhs', 'Dims_lhs', 'lhs'], ['Ndim_rhs', 'Dims_rhs', 'rhs'], ['Ndim_result', 'Dims_result', 'result']] ) adouble__imul__ = instant.inline_with_numpy(c_code_adouble__imul__, arrays=[['tmp_lhs','lhs'],['Ndim_lhs', 'Dims_lhs', 'lhs_tc'], ['Ndim_rhs', 'Dims_rhs', 'rhs_tc']] )
return; } if ( n != N ) { printf("n and N should be equal"); return; } p[0] = p0; for (int i=1; i<n; i++) { p[i] = p[i-1] + dt*(B*Q[i] - A*p[i-1]); } } """ N = 100000 time_loop = inline_with_numpy(c_code, arrays = [['n', 'p'], ['m', 'Q']], cppargs='-g', cache_dir="test_cache") p = zeros(N) T = 20.0 Q = sin(arange(0, T, T/N))+1 t1 = time.time() time_loop(p, Q, 1.0, 1.0, 1.0/N, N, 1.0) t2 = time.time() print('With instant:', t2-t1, 'seconds') p2 = zeros(N) t1 = time.time() time_loop2(p2, Q, 1.0, 1.0, 1.0/N, N, 1.0) t2 = time.time() print('With Python :', t2-t1, 'seconds')
def return_csa_magpar(): args = [["xn", "x", "in"], ["v1n", "v1", "in"], ["v2n", "v2", "in"], ["v3n", "v3", "in"]] return instant.inline_with_numpy(C_CODE, arrays=args)
import numpy from instant import inline_with_numpy c_code = """ double sum (int n1, double* array1){ double tmp = 0.0; for (int i=0; i<n1; i++) { tmp += array1[i]; } return tmp; } """ sum_func = inline_with_numpy(c_code, arrays=[['n1', 'array1']]) a = numpy.arange(10000000) a = numpy.sin(a) sum_func(a)