def recent_moving_average(x, axis = 0): """ Fast computation of recent moving average, where frac = 1/sqrt(t) a[t] = (1-frac)*a[t-1] + frac*x[t] """ import weave # ONLY WORKS IN PYTHON 2.X !!! if x.ndim!=2: y = recent_moving_average(x.reshape(x.shape[0], x.size//x.shape[0]), axis=0) return y.reshape(x.shape) assert x.ndim == 2 and axis == 0, 'Only implemented for a special case!' result = np.zeros(x.shape) code = """ int n_samples = Nx[0]; int n_dim = Nx[1]; for (int i=0; i<n_dim; i++) result[i] = x[i]; int ix=n_dim; for (int t=1; t<n_samples; t++){ float frac = 1./sqrt(t+1); for (int i=0; i<n_dim; i++){ result[ix] = (1-frac)*result[ix-n_dim] + frac*x[ix]; } ix += 1; } """ weave.inline(code, ['x', 'result'], compiler = 'gcc') return result
def pure_inline(arr): """Prints the given 3D array by accessing the raw numpy data and without using blitz converters. Notice the following: 1. '\\n' to escape generating a newline in the C++ code. 2. rows, cols = Narr[0], Narr[1]. 3. Array access using arr[(i*cols + j)*depth + k]. """ code = """ int rows = Narr[0]; int cols = Narr[1]; int depth = Narr[2]; for (int i=0; i < rows; i++) { for (int j=0; j < cols; j++) { printf("img[%3d][%3d]=", i, j); for (int k=0; k< depth; ++k) { printf(" %3d", arr[(i*cols + j)*depth + k]); } printf("\\n"); } } """ weave.inline(code, ['arr'])
def solve(n,m,t0,t1,dt,nu,f,verbose): """ takes in the arguments needed to calculate the heat diffusion. includes verbose mode. """ if verbose: print "Calculation from {0} to {1} with dt={2}".format(t0,t1,dt) if not isinstance(f,np.ndarray): f1=f f=np.zeros((m,n)) f.fill(f1) u=np.zeros((m,n)) u_new=np.zeros((m,n)) t=float(t0) expr = """ int i; int j; while(t<t1){ for(i=0;i<100;i++){ for(j=0;j<50;j++){ u_new(i,j)=u(i,j) + dt*(nu*u(i-1,j) + nu*u(i, j-1) - 4*nu*u(i, j) + nu*u(i,j+1) + nu*u(i+1, j) + f(i, j)); } } u=u_new; t=t+dt; }""" if verbose: print "Uses weaves inline function to run the program in c for performance"; weave.inline(expr, ['n','m','u','u_new','nu','dt','t','t1','f'], type_converters=weave.converters.blitz, compiler='gcc') if verbose: print "Returns the numpy array with the calculations" return u_new
def cspline_weave(dist, dim, h): code = ''' double k = 0; if( dim == 1 ){ k = 2./3; } if( dim == 2 ){ k = 10*pi/7; } if( dim == 3 ){ k = 1./pi; } for( int i=0; i < shape0; ++i){ for( int j=0; j < shape1; ++j){ int index = i + j*shape0; double Q = q[index]; if( Q > 2 ){ ret[index] = 0; } else { if( Q > 1 ){ ret[index] = k*.25*(2-Q)*(2-Q)*(2-Q); } else{ ret[index] = k*(1 - 1.5*Q*Q + .75*Q*Q*Q); } } } } ''' q = dist / h ret = np.zeros_like(q) shape0 = ret.shape[0] shape1 = ret.shape[1] weave.inline(code, ['q', 'dim', 'ret', 'pi', 'shape0', 'shape1'], verbose=1) return ret.reshape((shape0, shape1)) / h**dim
def liq_prime_unormalized_weave(q): code = ''' double A = -1.458; double B = 3.790; double C = -2.624; double D = -0.2915; double E = 0.5831; double F = 0.6500; for( int i=0; i < shape0; ++i ){ for( int j=0; j < shape1; ++j ){ int index = i + j*shape0; double u = q[index]/2; if( u > 1 ){ ret[index] = 0; } else { if( u > 0.3 ){ ret[index] = 2*A*pow(u,3) + 1.5*B*pow(u,2) + C*u + D/2; } else { ret[index] = - .5; } } } } ''' ret = np.zeros_like(q) shape0 = ret.shape[0] shape1 = ret.shape[1] weave.inline(code, ['ret', 'q', 'shape0', 'shape1'], verbose=1, compiler='gcc', extra_compile_args=['-O3']) return ret.reshape((shape0, shape1))
def blitz_inline(arr): """Prints the given 3D array by using blitz converters which provides a numpy-like syntax for accessing the numpy data. Notice the following: 1. '\\n' to escape generating a newline in the C++ code. 2. rows, cols = Narr[0], Narr[1]. 3. Array access using arr(i, j, k). """ code = """ int rows = Narr[0]; int cols = Narr[1]; int depth = Narr[2]; for (int i=0; i < rows; i++) { for (int j=0; j < cols; j++) { printf("img[%3d][%3d]=", i, j); for (int k=0; k< depth; ++k) { printf(" %3d", arr(i, j, k)); } printf("\\n"); } } """ weave.inline(code, ['arr'], type_converters=converters.blitz)
def mult_iqu(self,v): """ Performs the product of a sparse matrix :math:`Av`,\ with ``v`` a :mod:`numpy` array containing the three Stokes parameters [IQU] . .. note:: Compared to the operation ``mult`` this routine returns a :math:`n_t`-size vector defined as: .. math:: d_t= I_p + Q_p \cos(2\phi_t)+ U_p \sin(2\phi_t). with :math:`p` is the pixel observed at time :math:`t` with polarization angle :math:`\phi_t`. """ x=np.zeros(self.nrows) Nrows=self.nrows pixs=self.pairs cos,sin=self.cos,self.sin code = r""" int i ; for ( i=0;i<Nrows;++i){ if (pixs(i) == -1) continue; x(i) += v(3*pixs(i)) + v(3*pixs(i)+1) *cos(i) + v(3*pixs(i)+2) *sin(i); } """ inline(code,['pixs','v','x','Nrows','cos','sin'],verbose=1, extra_compile_args=[' -O3 -fopenmp ' ], support_code = r""" #include <stdio.h> #include <omp.h> #include <math.h>""", libraries=['gomp'],type_converters=weave.converters.blitz) return x
def _thinningIteration(im, iter): I, M = im, np.zeros(im.shape, np.uint8) expr = """ for (int i = 1; i < NI[0]-1; i++) { for (int j = 1; j < NI[1]-1; j++) { int p2 = I2(i-1, j); int p3 = I2(i-1, j+1); int p4 = I2(i, j+1); int p5 = I2(i+1, j+1); int p6 = I2(i+1, j); int p7 = I2(i+1, j-1); int p8 = I2(i, j-1); int p9 = I2(i-1, j-1); int A = (p2 == 0 && p3 == 1) + (p3 == 0 && p4 == 1) + (p4 == 0 && p5 == 1) + (p5 == 0 && p6 == 1) + (p6 == 0 && p7 == 1) + (p7 == 0 && p8 == 1) + (p8 == 0 && p9 == 1) + (p9 == 0 && p2 == 1); int B = p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9; int m1 = iter == 0 ? (p2 * p4 * p6) : (p2 * p4 * p8); int m2 = iter == 0 ? (p4 * p6 * p8) : (p2 * p6 * p8); if (A == 1 && B >= 2 && B <= 6 && m1 == 0 && m2 == 0) { M2(i,j) = 1; } } } """ weave.inline(expr, ["I", "iter", "M"]) return (I & ~M)
def _fast_clear_array2d(self, arr): code = \ """ Py_BEGIN_ALLOW_THREADS #define __TYPE %s int rows = Narr[0]; int cols = Narr[1]; for(int r=0; r < rows; r++){ for(int c=0; c < cols; c++){ __TYPE *arr_ptr = (__TYPE *)((char *)arr_array->data + r*arr_array->strides[0] + c*arr_array->strides[1]); *arr_ptr = 0.0; } } Py_END_ALLOW_THREADS """ \ % self.type_string inline(code, ['arr']) return
def logistic_map(x0, r, T): """ Returns a time series of length T using the logistic map x_(n+1) = r*x_n(1-x_n) at parameter r and using the initial condition x0. INPUT: x0 - Initial condition, 0 <= x0 <= 1 r - Bifurcation parameter, 0 <= r <= 4 T - length of the desired time series """ # Initialize the time series array timeSeries = np.empty(T) r = float(r) code = r""" int i; double xn; // Set initial condition timeSeries(0) = x0; for (i = 1; i < T; i++) { xn = timeSeries(i-1); timeSeries(i) = r * xn * (1 - xn); } """ args = ['x0', 'r', 'T', 'timeSeries'] weave.inline(code, arg_names=args, type_converters=weave.converters.blitz, compiler='gcc', extra_compile_args=['-O3']) return timeSeries
def shift_sum(v1, shifts, bins): real_type = real_same_precision_as(v1) shifts = numpy.array(shifts, dtype=real_type) bins = numpy.array(bins, dtype=numpy.uint32) blen = len(bins) - 1 v1 = numpy.array(v1.data, copy=False) slen = len(v1) if v1.dtype.name == 'complex64': code = point_chisq_code_single else: code = point_chisq_code_double n = int(len(shifts)) # Create some output memory chisq = numpy.zeros(n, dtype=real_type) inline(code, ['v1', 'n', 'chisq', 'slen', 'shifts', 'bins', 'blen'], extra_compile_args=[WEAVE_FLAGS] + omp_flags, libraries=omp_libs ) return chisq
def __call__(self, P): if self._arrays_to_check is not None: N = len(P) for name, X in self._arrays_to_check: if len(X) != N: raise ValueError('Array ' + name + ' has wrong size (' + str(len(X)) + ' instead of ' + str(N) + ')') self._arrays_to_check = None self.namespace['dt'] = P.clock._dt self.namespace['t'] = P.clock._t self.namespace['num_neurons'] = len(P) self.namespace['_S'] = P._S try: weave.inline( self.code_c, self.namespace.keys(), #['_S', 'num_neurons', 'dt', 't'], local_dict=self.namespace, support_code=c_support_code, compiler=self._weave_compiler, extra_compile_args=self._extra_compile_args, extra_link_args=self._extra_link_args) except: log_warn('brian.experimental.codegen.stateupdaters', 'C compilation failed, falling back on Python.') self.__class__ = PythonStateUpdater self.__init__(self.eqs, self.scheme, self.clock, self.freeze) self.__call__(P)
def recent_moving_average(x, axis=0): """ Fast computation of recent moving average, where frac = 1/sqrt(t) a[t] = (1-frac)*a[t-1] + frac*x[t] """ import weave # ONLY WORKS IN PYTHON 2.X !!! if x.ndim != 2: y = recent_moving_average(x.reshape(x.shape[0], x.size // x.shape[0]), axis=0) return y.reshape(x.shape) assert x.ndim == 2 and axis == 0, 'Only implemented for a special case!' result = np.zeros(x.shape) code = """ int n_samples = Nx[0]; int n_dim = Nx[1]; for (int i=0; i<n_dim; i++) result[i] = x[i]; int ix=n_dim; for (int t=1; t<n_samples; t++){ float frac = 1./sqrt(t+1); for (int i=0; i<n_dim; i++){ result[ix] = (1-frac)*result[ix-n_dim] + frac*x[ix]; } ix += 1; } """ weave.inline(code, ['x', 'result'], compiler='gcc') return result
def __call__(self, _spikes): if not self.prepared: self.prepare() if len(_spikes): if not isinstance(_spikes, numpy.ndarray): _spikes = array(_spikes, dtype=int) vars = self.vars # print '****' # print self.codestr # for k, v in vars.iteritems(): # if isinstance(v, numpy.ndarray): # print k, ': shape =', v.shape # else: # print k, ':', v # import sys # sys.stdout.flush() vars['_spikes'] = _spikes vars['_spikes_len'] = len(_spikes) if self.compiled_pycode is not None: exec self.compiled_pycode in self.pyvars weave.inline(self.codestr, self.vars_list, local_dict=self.vars, support_code=c_support_code, compiler=self._weave_compiler, extra_compile_args=self._extra_compile_args)
def blitz_inline(arr): """Prints the given 3D array by using blitz converters which provides a numpy-like syntax for accessing the numpy data. Notice the following: 1. '\\n' to escape generating a newline in the C++ code. 2. rows, cols = Narr[0], Narr[1]. 3. Array access using arr(i, j, k). """ code = """ int rows = Narr[0]; int cols = Narr[1]; int depth = Narr[2]; for (int i=0; i < rows; i++) { for (int j=0; j < cols; j++) { printf("img[%3d][%3d]=", i, j); for (int k=0; k< depth; ++k) { arr(i, j, k) += 1; printf(" %3d", arr(i, j, k)); } printf("\\n"); } } """ weave.inline(code, ['arr'], type_converters=converters.blitz)
def insert_C(self, delay, target): ''' Insertion of events using weave. ``delay`` Delays in timesteps (array). ``target`` Target synaptic indexes (array). ''' # Check if we can fit the events (crude check) nevents = len(target) m = max(self.n) + nevents if m > self.X.shape[1]: self.resize(m) Xflat = self.X_flat n = self.n ncols = self.X.shape[1] currentt = self.currenttime ndelays = len(self.n) code = ''' for(int k=0;k<nevents;k++) { const int d = (currentt+delay[k]) % ndelays; Xflat[d*ncols+n[d]] = target[k]; n[d]++; } ''' weave.inline(code, ['nevents','n','delay','Xflat','target','ncols','currentt','ndelays'], \ compiler=self._cpp_compiler, extra_compile_args=self._extra_compile_args)
def test_fast(self, X): op = np.zeros((X.shape[0], self.tree_params.num_classes)) tree = self.compact_tree # work around #in memory: for non leaf node - 0 is lchild index, 1 is rchild, 2 is dim to test, 3 is threshold #in memory: for leaf node - 0 is leaf indicator -1, 1 is the node id, the rest is the probability for each class code = """ int ex_id, node_loc, c_it; for (ex_id=0; ex_id<NX[0]; ex_id++) { node_loc = 0; while (tree[node_loc] != -1) { if (X2(ex_id, int(tree[node_loc+2])) < tree[node_loc+3]) { node_loc = tree[node_loc+1]; // right node } else { node_loc = tree[node_loc]; // left node } } for (c_it=0; c_it<Nop[1]; c_it++) { OP2(ex_id, c_it) = tree[node_loc + 2 + c_it]; } } """ weave.inline(code, ['X', 'op', 'tree']) return op
def step(self, iter): I = self.image M = np.ones(self.image.shape) a, b = self.image.shape thin = """ for (int i = 1; i < a; i++){ for (int j = 0; j < b; j++){ int p2 = I2(i-1, j); int p3 = I2(i-1, j+1); int p4 = I2(i, j+1); int p5 = I2(i+1, j+1); int p6 = I2(i+1, j); int p7 = I2(i+1, j-1); int p8 = I2(i, j-1); int p9 = I2(i-1, j-1); int k = 0; int A = (p2 == 0 && p3 == 1) + (p3 == 0 && p4 == 1) + (p4 == 0 && p5 == 1) + (p5 == 0 && p6 == 1) + (p6 == 0 && p7 == 1) + (p7 == 0 && p8 == 1) + (p8 == 0 && p9 == 1) + (p9 == 0 && p2 == 1); int B = p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9; int m1 = iter == 1 ? (p2 * p4 * p6) : (p2 * p4 * p8); int m2 = iter == 1 ? (p4 * p6 * p8) : (p2 * p6 * p8); if (A == 1 && B >= 2 && B <= 6 && m1 == 0 && m2 == 0) { M2(i,j) = 0; } } } """ weave.inline(thin, ["I", "iter", "M", "a", "b"]) return np.multiply(I, M)
def grad_dist2(ls, x1, x2=None): if x2 is None: x2 = x1 # Rescale. x1 = x1 / ls x2 = x2 / ls N = x1.shape[0] M = x2.shape[0] D = x1.shape[1] gX = np.zeros((x1.shape[0], x2.shape[0], x1.shape[1])) code = \ """ for (int i=0; i<N; i++) for (int j=0; j<M; j++) for (int d=0; d<D; d++) gX(i,j,d) = (2/ls(d))*(x1(i,d) - x2(j,d)); """ try: weave.inline(code, ['x1','x2','gX','ls','M','N','D'], \ type_converters=weave.converters.blitz, \ compiler='gcc') except: # The C code weave above is 10x faster than this: for i in range(0, x1.shape[0]): gX[i, :, :] = 2 * (x1[i, :] - x2[:, :]) * (1 / ls) return gX
def flagging_samples(self): """ Flags the time samples related to bad pixels to -1. """ N = self.nsamples o2n = self.old2new pixs = self.pixs code = """ int i,pixel; for ( i=0;i<N;++i){ pixel=pixs(i); if (pixel == -1) continue; pixs(i)=o2n(pixel); } """ inline(code, ['pixs', 'o2n', 'N'], verbose=1, extra_compile_args=[' -O3 -fopenmp '], support_code=r""" #include <stdio.h> #include <omp.h> #include <math.h>""", libraries=['gomp'], type_converters=weave.converters.blitz)
def decode(llr): N = llr.size//2 x = (llr[:N*2].reshape(-1,2,1)*output_map_soft).sum(1) msg = np.empty(N, np.uint8) weave.inline(""" const int M = 128; int64_t cost[M*2], scores[M] = {/* zero-initialized */}; uint8_t bt[N][M]; for (int k=0; k<N; k++) { for (int i=0; i<M; i++) { cost[2*i+0] = scores[((i<<1) & 127) | 0] + x(k, i); cost[2*i+1] = scores[((i<<1) & 127) | 1] + x(k, i); } for (int i=0; i<M; i++) { int a = cost[2*i+0]; int b = cost[2*i+1]; bt[k][i] = (a<b) ? 1 : 0; scores[i] = (a<b) ? b : a; } } int i = (scores[0] < scores[1]) ? 1 : 0; for (int k=N-1; k>=0; k--) { int j = bt[k][i]; msg(k) = i >> 6; i = ((i<<1)&127) + j; } """, ['N','x','msg'], type_converters=weave.converters.blitz) return msg
def insert_C(self,delay,target): ''' Insertion of events using weave. ``delay`` Delays in timesteps (array). ``target`` Target synaptic indexes (array). ''' # Check if we can fit the events (crude check) nevents=len(target) m=max(self.n)+nevents if m>self.X.shape[1]: self.resize(m) Xflat=self.X_flat n=self.n ncols=self.X.shape[1] currentt=self.currenttime ndelays=len(self.n) code=''' for(int k=0;k<nevents;k++) { const int d = (currentt+delay[k]) % ndelays; Xflat[d*ncols+n[d]] = target[k]; n[d]++; } ''' weave.inline(code, ['nevents','n','delay','Xflat','target','ncols','currentt','ndelays'], \ compiler=self._cpp_compiler, extra_compile_args=self._extra_compile_args)
def heat_equation(t0, t1, dt, n, m, u, f, nu): # Used as a placeholder u_new = np.zeros((n, m)) code = """ int stop = t1/dt; // Timestep loop for(t0; t0 < stop; t0++) { int i, j; for(i = 1; i < n - 1; i++) { for(j = 1; j < m - 1; j++) { U_NEW2(i,j) = U2(i, j) + dt*(nu*U2(i-1,j) + nu*U2(i,j-1) - 4*nu*U2(i,j) + nu*U2(i,j+1) + nu*U2(i+1,j) + F2(i,j)); } } // Update U2 with the values from this run. // This can probably be done with pointer swap also for(i = 1; i < n - 1; i++) { for(j = 1; j < m - 1; j++) { U2(i, j) = U_NEW2(i, j); } } } """ inline(code, ['t0', 't1', 'dt', 'n', 'm', 'u', 'f', 'nu', 'u_new']) return u
def shift_sum(v1, shifts, bins): real_type = real_same_precision_as(v1) shifts = numpy.array(shifts, dtype=real_type) bins = numpy.array(bins, dtype=numpy.uint32) blen = len(bins) - 1 # pylint:disable=unused-variable v1 = numpy.array(v1.data, copy=False) slen = len(v1) # pylint:disable=unused-variable if v1.dtype.name == 'complex64': code = point_chisq_code_single else: code = point_chisq_code_double n = int(len(shifts)) # Create some output memory chisq = numpy.zeros(n, dtype=real_type) inline(code, ['v1', 'n', 'chisq', 'slen', 'shifts', 'bins', 'blen'], extra_compile_args=[WEAVE_FLAGS] + omp_flags, libraries=omp_libs ) return chisq
def get_leaf_ids(self, X): op = np.zeros((X.shape[0])) tree = self.compact_tree # work around #in memory: for non leaf node - 0 is lchild index, 1 is rchild, 2 is dim to test, 3 is threshold #in memory: for leaf node - 0 is leaf indicator -1, 1 is the node id, the rest is the probability for each class code = """ int ex_id, node_loc; for (ex_id=0; ex_id<NX[0]; ex_id++) { node_loc = 0; while (tree[node_loc] != -1) { if (X2(ex_id, int(tree[node_loc+2])) < tree[node_loc+3]) { node_loc = tree[node_loc+1]; // right node } else { node_loc = tree[node_loc]; // left node } } op[ex_id] = tree[node_loc + 1]; // leaf id } """ weave.inline(code, ['X', 'op', 'tree']) return op
def rmult(self,v): """ Performs the product for the transpose operator :math:`A^T`. """ x=np.zeros(self.ncols) Nrows=self.nrows pixs=self.pairs code = r""" int i ; for ( i=0;i<Nrows;++i){ if (pixs(i) == -1) continue; x(pixs(i))+=v(i); } """ inline(code,['pixs','v','x','Nrows'],verbose=1, extra_compile_args=[' -O3 -fopenmp ' ], support_code = r""" #include <stdio.h> #include <omp.h> #include <math.h>""", libraries=['gomp'],type_converters=weave.converters.blitz) return x
def compute_psi_weave_single(lls, lsf, xmean, xvar, z): ls = np.exp(lls) sf = np.exp(lsf) sf = float(sf) M = z.shape[0] Q = z.shape[1] lsp2xvar = ls + 2.0 * xvar constterm1 = ls / lsp2xvar log_denom_psi2 = 0.5 * np.log(constterm1) lspxvar = ls + xvar constterm2 = ls / lspxvar log_denom_psi1 = 0.5 * np.log(constterm2) psi2 = np.empty((M, M)) psi1 = np.empty((M)) support_code = """ #include <math.h> """ code = """ for(int m1=0; m1<M; m1++) { double log_psi1 = 0; for(int m2=0; m2<=m1; m2++) { double log_psi2 = 0; for(int q=0; q<Q; q++) { double vq = xvar(q); double lq = ls(q); double z1q = z(m1, q); double z2q = z(m2, q); if (m2==0) { double muz = xmean(q) - z1q; double psi1_exp = -muz*muz/2.0/(vq+lq) + log_denom_psi1(q); log_psi1 += psi1_exp; } double muzhat = xmean(q) - (z1q+z2q)/2.0; double dz = z1q-z2q; double psi2_exp = - dz*dz/(4.0*lq) - muzhat*muzhat/(2.0*vq+lq) + log_denom_psi2(q); log_psi2 += psi2_exp; } double exp_psi2 = exp(log_psi2); psi2(m1, m2) = sf*sf*exp_psi2; if (m1 != m2) { psi2(m2, m1) = sf*sf*exp_psi2; } } psi1(m1) = sf*exp(log_psi1); } """ weave.inline(code, support_code=support_code, arg_names=[ 'psi1', 'psi2', 'M', 'Q', 'sf', 'ls', 'z', 'xmean', 'xvar', 'log_denom_psi1', 'log_denom_psi2' ], type_converters=weave.converters.blitz) return psi1, psi2
def rmult_iqu(self,v): """ Performs the product for the transpose operator :math:`A^T` to get a IQU map-like vector. Since this vector resembles the pixel of 3 maps it has 3 times the size ``Npix``. IQU values referring to the same pixel are contiguously stored in the memory. """ x=np.zeros(self.ncols*self.pol) N=self.nrows pixs=self.pairs cos,sin=self.cos,self.sin code = """ int i; for ( i=0;i<N;++i){ if (pixs(i) == -1) continue; x(3*pixs(i)) += v(i); x(3*pixs(i)+1) += v(i)*cos(i); x(3*pixs(i)+2) += v(i)*sin(i); } """ inline(code,['pixs','v','x','N','cos','sin'],verbose=1, extra_compile_args=[' -O3 -fopenmp ' ], support_code = r""" #include <stdio.h> #include <omp.h> #include <math.h>""", libraries=['gomp'],type_converters=weave.converters.blitz) return x
def qspline_prime_unormalized_weave(q): code = ''' for( int i=0; i < shape0; ++i ){ for( int j=0; j < shape1; ++j ){ int index = i + j*shape0; double Q = q[index]; if( Q > 3 ){ ret[index] = 0; } else { if( Q > 2 ){ ret[index] = -5*pow(3-Q, 4); } else { if( Q > 1 ){ ret[index] = -5*pow(3-Q, 5) + 5*6*pow(2-Q, 4); } else{ ret[index] = -5*pow(3-Q, 4) + 5*6*pow(2-Q, 4) - 5*15*(1-Q, 4); } } } } } ''' ret = np.zeros_like(q) shape0 = ret.shape[0] shape1 = ret.shape[1] weave.inline(code, ['ret', 'q', 'shape0', 'shape1'], verbose=1, compiler='gcc', extra_compile_args=['-O3']) return ret.reshape((shape0, shape1))
def mult_qu(self,v): """ Performs :math:`A * v` with :math:`v` being a *polarization* vector. The output array will encode a linear combination of the two Stokes parameters, (whose components are stored contiguously). .. math:: d_t= Q_p \cos(2\phi_t)+ U_p \sin(2\phi_t). """ x=np.zeros(self.nrows) Nrows=self.nrows pixs=self.pairs cos,sin=self.cos,self.sin code = """ int i ; for ( i=0;i<Nrows;++i){ if (pixs(i) == -1) continue; x(i)+=v(2*pixs(i)) *cos(i) + v(2*pixs(i)+1) *sin(i); } """ inline(code,['pixs','v','x','Nrows','cos','sin'],verbose=1, extra_compile_args=[' -O3 -fopenmp ' ], support_code = r""" #include <stdio.h> #include <omp.h> #include <math.h>""", libraries=['gomp'],type_converters=weave.converters.blitz) return x
def aggregate(group_idx, a, func='sum', size=None, fill_value=0, order='C', dtype=None, axis=None, **kwargs): func = get_func(func, aliasing, optimized_funcs) if not isstr(func): raise NotImplementedError("generic functions not supported, in the weave implementation of aggregate") # Preparations for optimized processing group_idx, a, flat_size, ndim_idx, size = input_validation(group_idx, a, size=size, order=order, axis=axis) dtype = check_dtype(dtype, func, a, len(group_idx)) check_fill_value(fill_value, dtype) nans = func.startswith('nan') if nans: flat_size += 1 if func in ('sum', 'any', 'len', 'anynan', 'nansum', 'nanlen'): ret = np.zeros(flat_size, dtype=dtype) elif func in ('prod', 'all', 'allnan', 'nanprod'): ret = np.ones(flat_size, dtype=dtype) else: ret = np.full(flat_size, fill_value, dtype=dtype) # In case we should get some ugly fortran arrays, convert them inline_vars = dict(group_idx=np.ascontiguousarray(group_idx), a=np.ascontiguousarray(a), ret=ret, fill_value=fill_value) # TODO: Have this fixed by proper raveling if func in ('std', 'var', 'nanstd', 'nanvar'): counter = np.zeros_like(ret, dtype=int) inline_vars['means'] = np.zeros_like(ret) inline_vars['ddof'] = kwargs.pop('ddof', 0) elif func in ('mean', 'nanmean'): counter = np.zeros_like(ret, dtype=int) else: # Using inverse logic, marking anyting touched with zero for later removal counter = np.ones_like(ret, dtype=bool) inline_vars['counter'] = counter if np.isscalar(a): func += 'scalar' inline_vars['a'] = a inline(c_funcs[func], inline_vars.keys(), local_dict=inline_vars, define_macros=c_macros, extra_compile_args=c_args) # Postprocessing if func in ('sum', 'any', 'anynan', 'nansum') and fill_value != 0: ret[counter] = fill_value elif func in ('prod', 'all', 'allnan', 'nanprod') and fill_value != 1: ret[counter] = fill_value if nans: # Restore the shifted return array ret = ret[1:] # Deal with ndimensional indexing if ndim_idx > 1: ret = ret.reshape(size, order=order) return ret
def inner_inline_real(self, other): x = _np.array(self._data, copy=False) y = _np.array(other, copy=False) total = _np.array([0.], dtype=float64) N = len(self) inline(inner_code, ['x', 'y', 'total', 'N'], libraries=omp_libs, extra_compile_args=code_flags) return total[0]
def Ramp_list1(result, start, end): code = """ const int size = result.len(); const double step = (end-start)/(size-1); for (int i = 0; i < size; i++) result[i] = start + step*i; """ weave.inline(code, ["result", "start", "end"], verbose=2)
def Ramp_list1(result, start, end): code = """ const int size = result.len(); const double step = (end-start)/(size-1); for (int i = 0; i < size; i++) result[i] = start + step*i; """ weave.inline(code, ["result","start", "end"], verbose=2)
def run(self): weave.inline( self.code_str, self.namespace.keys(), local_dict=self.namespace, #support_code=c_support_code, compiler=self._weave_compiler, extra_compile_args=self._extra_compile_args)
def embed_time_series_array(self, time_series_array, dimension, delay): """ Return a :index:`delay embedding` of all time series. .. note:: Only works for scalar time series! **Example:** >>> ts = Surrogates.SmallTestData().original_data >>> Surrogates.SmallTestData().embed_time_series_array( ... time_series_array=ts, dimension=3, delay=2)[0,:6,:] array([[ 0. , 0.61464833, 1.14988147], [ 0.31244015, 0.89680225, 1.3660254 ], [ 0.61464833, 1.14988147, 1.53884177], [ 0.89680225, 1.3660254 , 1.6636525 ], [ 1.14988147, 1.53884177, 1.73766672], [ 1.3660254 , 1.6636525 , 1.76007351]]) :type time_series_array: 2D array [index, time] :arg time_series_array: The time series array to be normalized. :arg int dimension: The embedding dimension. :arg int delay: The embedding delay. :rtype: 3D array [index, time, dimension] :return: the embedded time series. """ if self.silence_level <= 1: print "Embedding all time series in dimension", dimension, \ "and with lag", delay, "..." (N, n_time) = time_series_array.shape embedding = np.empty((N, n_time - (dimension - 1)*delay, dimension)) code = r""" int i, j, k, max_delay, len_embedded, index; // Calculate the maximum delay max_delay = (dimension - 1)*delay; // Calculate the length of the embedded time series len_embedded = n_time - max_delay; for (i = 0; i < N; i++) { for (j = 0; j < dimension; j++) { index = j*delay; for (k = 0; k < len_embedded; k++) { embedding(i,k,j) = time_series_array(i,index); index++; } } } """ args = ['N', 'n_time', 'dimension', 'delay', 'time_series_array', 'embedding'] weave.inline(code, arg_names=args, type_converters=weave.converters.blitz, compiler='gcc', extra_compile_args=['-O3']) return embedding
def ode_rhs(self, t, y, p): ydot = self.ydot weave.inline( r''' ydot[0] = -p[0]*y[0] + p[2]*y[1]*y[2]; ydot[1] = p[0]*y[0] - p[1]*pow(y[1], 2) - p[2]*y[1]*y[2]; ydot[2] = p[1]*pow(y[1], 2); ''', ['ydot', 't', 'y', 'p']) return ydot
def Ramp_numeric1(result, start, end): code = """ const int size = Nresult[0]; const double step = (end-start)/(size-1); double val = start; for (int i = 0; i < size; i++) *result++ = start + step*i; """ weave.inline(code, ['result', 'start', 'end'], compiler='gcc')
def batch_correlate_execute(self, y): num_vectors = self.num_vectors # pylint:disable=unused-variable size = self.size # pylint:disable=unused-variable x = numpy.array(self.x.data, copy=False) # pylint:disable=unused-variable z = numpy.array(self.z.data, copy=False) # pylint:disable=unused-variable y = numpy.array(y.data, copy=False) inline(batch_correlator_code, ['x', 'y', 'z', 'size', 'num_vectors'], extra_compile_args=[WEAVE_FLAGS] + omp_flags, libraries=omp_libs)
def inner_inline_real(self, other): x = _np.array(self._data, copy=False) # pylint:disable=unused-variable y = _np.array(other, copy=False) # pylint:disable=unused-variable total = _np.array([0.], dtype=float64) N = len(self) # pylint:disable=unused-variable inline(inner_code, ['x', 'y', 'total', 'N'], libraries=omp_libs, extra_compile_args=code_flags) return total[0]
def Ramp_numeric1(result,start,end): code = """ const int size = Nresult[0]; const double step = (end-start)/(size-1); double val = start; for (int i = 0; i < size; i++) *result++ = start + step*i; """ weave.inline(code,['result','start','end'],compiler='gcc')
def ode_rhs(self, t, y, p): ydot = self.ydot weave.inline( r''' ydot[0] = y[1]*y[2]*p[2] + (y[0]*p[0])*(-1); ydot[1] = y[0]*p[0] + (pow(y[1], 2)*p[1])*(-1) + (y[1]*y[2]*p[2])*(-1); ydot[2] = pow(y[1], 2)*p[1]; ''', ['ydot', 't', 'y', 'p']) return ydot
def correlate_simd(ht, st, qt): htilde = _np.array(ht.data, copy = False).view(dtype = float32) stilde = _np.array(st.data, copy = False).view(dtype = float32) # pylint:disable=unused-variable qtilde = _np.array(qt.data, copy = False).view(dtype = float32) # pylint:disable=unused-variable arrlen = len(htilde) # pylint:disable=unused-variable inline(corr_simd_code, ['htilde', 'stilde', 'qtilde', 'arrlen'], extra_compile_args = [WEAVE_FLAGS], #extra_compile_args = ['-mno-avx -mno-sse2 -mno-sse3 -mno-ssse3 -mno-sse4 -mno-sse4.1 -mno-sse4.2 -mno-sse4a -O2 -w'], #extra_compile_args = ['-msse3 -O3 -w'], support_code = corr_support, auto_downcast = 1)
def oaccess(): x=bunch() x.a = 1 code = """ // BROKEN! // Try to emulate Python's: print 'x.a',x.a std::cout << "x.a " << x.a << std::endl; """ inline(code,['x'])
def step_indices(group_idx): """ Get the edges of areas within group_idx, which are filled with the same value """ ilen = step_count(group_idx) + 1 indices = np.empty(ilen, int) indices[0] = 0 indices[-1] = group_idx.size inline(c_step_indices, ['group_idx', 'indices'], define_macros=c_macros, extra_compile_args=c_args) return indices
def correlate(self): htilde = self.x # pylint:disable=unused-variable stilde = self.y # pylint:disable=unused-variable qtilde = self.z # pylint:disable=unused-variable arrlen = self.arrlen # pylint:disable=unused-variable segsize = self.segsize # pylint:disable=unused-variable inline(self.code, ['htilde', 'stilde', 'qtilde', 'arrlen', 'segsize'], extra_compile_args = [WEAVE_FLAGS] + omp_flags, #extra_compile_args = ['-mno-avx -mno-sse2 -mno-sse3 -mno-ssse3 -mno-sse4 -mno-sse4.1 -mno-sse4.2 -mno-sse4a -O2 -w'] + omp_flags, #extra_compile_args = ['-msse3 -O3 -w'] + omp_flags, libraries = omp_libs, support_code = self.support, auto_downcast = 1)
def correlate_parallel(ht, st, qt): htilde = _np.array(ht.data, copy = False) stilde = _np.array(st.data, copy = False) # pylint:disable=unused-variable qtilde = _np.array(qt.data, copy = False) # pylint:disable=unused-variable arrlen = len(htilde) # pylint:disable=unused-variable segsize = default_segsize # pylint:disable=unused-variable inline(corr_parallel_code, ['htilde', 'stilde', 'qtilde', 'arrlen', 'segsize'], extra_compile_args = [WEAVE_FLAGS] + omp_flags, libraries = omp_libs, #extra_compile_args = ['-mno-avx -mno-sse2 -mno-sse3 -mno-ssse3 -mno-sse4 -mno-sse4.1 -mno-sse4.2 -mno-sse4a -O2 -w'], #extra_compile_args = ['-msse3 -O3 -w'], support_code = corr_support, auto_downcast = 1)
def Ramp_list2(result, start, end): code = """ const int size = result.len(); const double step = (end-start)/(size-1); for (int i = 0; i < size; i++) { PyObject* val = PyFloat_FromDouble( start + step*i ); PySequence_SetItem(py_result,i, val); } """ weave.inline(code, ["result", "start", "end"], verbose=2)
def fast_second_phase(invec, indices, N1, N2): """ This is the second phase of the FFT decomposition that actually performs the pruning. It is an explicit calculation for the subset of points. Note that there seem to be some numerical accumulation issues at various values of N1 and N2. Parameters ---------- invec : The result of the first phase FFT indices : array of ints The index locations to calculate the FFT N1 : int The length of the second phase "FFT" N2 : int The length of the first phase FFT Returns ------- out : array of floats """ invec = numpy.array(invec.data, copy=False) NI = len(indices) # pylint:disable=unused-variable N1=int(N1) N2=int(N2) out = numpy.zeros(len(indices), dtype=numpy.complex64) # Note, the next step if this needs to be faster is to invert the loops code = """ float pi = 3.14159265359; for(int i=0; i<NI; i++){ float sp, cp; std::complex<double> val= (0, 0); unsigned int k = indices[i]; int N = N1*N2; float k2 = k % N2; float phase_inc = 2 * pi * float(k) / float(N); sincosf(phase_inc, &sp, &cp); std::complex<float> twiddle_inc = std::complex<float>(cp, sp); std::complex<float> twiddle = std::complex<float>(1, 0); for (float n1=0; n1<N1; n1+=1){ val += twiddle * invec[int(k2 + N2*n1)]; twiddle *= twiddle_inc; } out[i] = val; } """ weave.inline(code, ['N1', 'N2', 'NI', 'indices', 'out', 'invec'], extra_compile_args=[WEAVE_FLAGS]) return out
def in_place_mult(num, mat): """In-place multiplication of a matrix by a scalar. """ nrow, ncol = mat.shape code = """ for(int i=0;i<nrow;++i) for(int j=0;j<ncol;++j) mat(i,j) *= num; """ inline(code, ["num", "mat", "nrow", "ncol"], type_converters=converters.blitz)
def encode(y): output = np.empty(y.size*2, np.uint8) weave.inline(""" int sh = 0, N = y.extent(blitz::firstDim); for (int i=0; i<N; i++) { sh = (sh>>1) ^ ((int)y(i) << 6); output(2*i+0) = output_map(0,sh); output(2*i+1) = output_map(1,sh); } """, ['y','output','output_map'], type_converters=weave.converters.blitz) return output
def abs_arg_max(self): if self.kind == 'real': return _np.argmax(self.data) else: data = _np.array(self._data, copy=False).view(real_same_precision_as(self)) loc = _np.array([0]) N = len(self) inline(code_abs_arg_max, ['data', 'loc', 'N'], libraries=omp_libs, extra_compile_args=code_flags) return loc[0]
def execute(self): inarr = self.inarr # pylint:disable=unused-variable mval = self.mval # pylint:disable=unused-variable norm = self.norm # pylint:disable=unused-variable mloc = self.mloc # pylint:disable=unused-variable nstart = self.nstart # pylint:disable=unused-variable howmany = self.howmany # pylint:disable=unused-variable inline(self.code, ['inarr', 'mval', 'norm', 'mloc', 'nstart', 'howmany'], extra_compile_args = [WEAVE_FLAGS], #extra_compile_args = ['-mno-avx -mno-sse2 -mno-sse3 -mno-ssse3 -mno-sse4 -mno-sse4.1 -mno-sse4.2 -mno-sse4a -O2 -w'], #extra_compile_args = ['-msse4.1 -O3 -w'], support_code = self.support, auto_downcast = 1, verbose = self.verbose)
def Ramp_numeric2(result,start,end): code = """ const int size = Nresult[0]; double step = (end-start)/(size-1); double val = start; for (int i = 0; i < size; i++) { result[i] = val; val += step; } """ weave.inline(code,['result','start','end'],compiler='gcc')
def execute(self): inarr = self.inarr mval = self.mval norm = self.norm mloc = self.mloc nstart = self.nstart howmany = self.howmany inline(self.code, ['inarr', 'mval', 'norm', 'mloc', 'nstart', 'howmany'], extra_compile_args = [WEAVE_FLAGS], #extra_compile_args = ['-mno-avx -mno-sse2 -mno-sse3 -mno-ssse3 -mno-sse4 -mno-sse4.1 -mno-sse4.2 -mno-sse4a -O2 -w'], #extra_compile_args = ['-msse4.1 -O3 -w'], support_code = self.support, auto_downcast = 1, verbose = self.verbose)
def check_conversion(self,level=5): a = self.seq_type([]) before = sys.getrefcount(a) import weave weave.inline("",['a']) #print 'first:',before # first call is goofing up refcount. before = sys.getrefcount(a) weave.inline("",['a']) after = sys.getrefcount(a) #print '2nd,3rd:', before, after assert(after == before)