def gen_FFT_M2X_finish(funname, M): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block("void %s(%s *y, %s s)\n" % (funname, basetype, type)) y = parameter("y", array=True) s = parameter("s") f = var("f") print for j in range(1, M / 4): f ^= y[2 * j + 1] if j == M / 8: y[2 * j] ^= y[2 * j] - f else: y[2 * j] ^= y[2 * j] - tan(2.0 * pi * j / M) * f y[2 * j + 1] ^= (1.0 / cos(2.0 * pi * j / M)) * f print for j in range(M / 4 + 1, M / 2): f ^= y[2 * j + 1] if j == 3 * M / 8: y[2 * j] ^= y[2 * j] + f else: y[2 * j] ^= y[2 * j] + (-tan(2.0 * pi * j / M)) * f y[2 * j + 1] ^= (1.0 / cos(2.0 * pi * j / M)) * f print f ^= y[M / 2] - s y[M / 2] ^= s y[M / 2 + 1] ^= f end_block() print
def gen_core_gen_M_L(funname, p_max): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block("void %s(int p, %s *scale, %s q, %s *dML, %s *ML)\n" % (funname, basetype, type, basetype, basetype)) scale = parameter("scale", array=True) q = parameter("q") dML = parameter("dML", array=True) ML = parameter("ML", array=True, horiz_add=-1) f = var("f") print begin_block("switch(p) ") for n in range(0, p_max + 1): print "#if FMM_P_MAX >= %i" % n for n in range(p_max, -1, -1): print "\tcase %i:" % n # print "\t\tf = scale[%i];" % n f ^= scale[n] * q for m in range((n + 1) * (n + 2) - 1, n * (n + 1) - 1, -1): ML[m] += f * dML[m] print "#endif /* FMM_P_MAX >= %i */" % n end_block() end_block() print
def gen_core_gen_M_L(funname, p_max): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block('void %s(int p, %s *scale, %s q, %s *dML, %s *ML)\n' % (funname, basetype, type, basetype, basetype)) scale = parameter("scale", array=True) q = parameter("q") dML = parameter("dML", array=True) ML = parameter("ML", array=True, horiz_add=-1) f = var("f") print begin_block("switch(p) ") for n in range(0, p_max + 1): print "#if FMM_P_MAX >= %i" % n for n in range(p_max, -1, -1): print "\tcase %i:" % n #print "\t\tf = scale[%i];" % n f ^= scale[n] * q for m in range((n + 1) * (n + 2) - 1, n * (n + 1) - 1, -1): ML[m] += f * dML[m] print "#endif /* FMM_P_MAX >= %i */" % n end_block() end_block() print
def gen_FFT_M2X_finish(funname, M): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block('void %s(%s *y, %s s)\n' % (funname, basetype, type)) y = parameter("y", array=True) s = parameter("s") f = var("f") print for j in range(1, M / 4): f ^= y[2 * j + 1] if j == M / 8: y[2 * j] ^= y[2 * j] - f else: y[2 * j] ^= y[2 * j] - tan(2.0 * pi * j / M) * f y[2 * j + 1] ^= (1.0 / cos(2.0 * pi * j / M)) * f print for j in range(M / 4 + 1, M / 2): f ^= y[2 * j + 1] if j == 3 * M / 8: y[2 * j] ^= y[2 * j] + f else: y[2 * j] ^= y[2 * j] + (-tan(2.0 * pi * j / M)) * f y[2 * j + 1] ^= (1.0 / cos(2.0 * pi * j / M)) * f print f ^= y[M / 2] - s y[M / 2] ^= s y[M / 2 + 1] ^= f end_block() print
def gen_core_eval_L_M_grad_plus(funname, p_max): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block( 'void %s(int p, %s *scale, %s *M, %s *Y, %s *x, %s *y, %s *z)\n' % (funname, basetype, basetype, basetype, type, type, type)) M = parameter("M", array=True, base_array=True) Y = parameter("Y", array=True) scale = parameter("scale", array=True) hx = var("hx") hy = var("hy") hz = var("hz") cx = var("cx") cy = var("cy") cz = var("cz") f1 = var("f1") f2 = var("f2") print hx ^= 0 hy ^= 0 hz ^= 0 print begin_block("switch(p) ") for j in range(0, p_max + 1): print "#if FMM_P_MAX >= %i" % j for j in range(p_max, -1, -1): print "\tcase %i:" % j cx ^= -sqrt(float((j + 2) * (j + 1))) * M[Re(j, 0)] * Y[Re(j + 1, 1)] cy ^= -sqrt(float((j + 2) * (j + 1))) * M[Re(j, 0)] * Y[Im(j + 1, 1)] cz ^= (j + 1) * M[Re(j, 0)] * Y[Re(j + 1, 0)] for k in range(1, j + 1): f1 ^= sqrt(float((j + k + 2) * (j + k + 1))) f2 ^= sqrt(float((j - k + 2) * (j - k + 1))) cx ^= cx - f1*(M[Re(j,k)]*Y[Re(j+1,k+1)] - M[Im(j,k)]*Y[Im(j+1,k+1)])\ -f2*(M[Im(j,k)]*Y[Im(j+1,k-1)] - M[Re(j,k)]*Y[Re(j+1,k-1)]) cy ^= cy - f1*(M[Im(j,k)]*Y[Re(j+1,k+1)] + M[Re(j,k)]*Y[Im(j+1,k+1)])\ -f2*(M[Im(j,k)]*Y[Re(j+1,k-1)] + M[Re(j,k)]*Y[Im(j+1,k-1)]) cz ^= cz - 2.0 * sqrt(float( (j + k + 1) * (j - k + 1))) * (M[Im(j, k)] * Y[Im(j + 1, k)] - M[Re(j, k)] * Y[Re(j + 1, k)]) hx ^= hx + scale[j] * cx hy ^= hy + scale[j] * cy hz ^= hz + scale[j] * cz print "#endif /* FMM_P_MAX >= %i */" % j end_block() print '\t*x = hx;' print '\t*y = hy;' print '\t*z = hz;' end_block() print
def gen_FFT_X2L_finish(funname, M): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block('void %s(%s *x, %s *y, %s s)\n' % (funname, basetype, basetype, type)) x = parameter("x", array=True) y = parameter("y", array=True) s = parameter("s") f0 = var("f0") f1 = var("f1") f2 = var("f2") f3 = var("f3") print #y[0] ^= s; #y[1] ^= 0.0; #y[2] ^= x[0] - y[0]; #y[3] ^= x[1]; #for j in range(1, M/4): # y[4*j] ^= x[M-2*j] + y[4*j-2]; # y[4*j+1] ^= y[4*j-1] - x[M-2*j+1]; # y[4*j+2] ^= x[2*j] - y[4*j]; # y[4*j+3] ^= x[2*j+1] - y[4*j+1]; #y[M] ^= x[M/2] + y[M-2]; #y[M+1] ^= 0.0; f0 ^= s f1 ^= 0.0 f2 ^= x[0] - f0 f3 ^= x[1] y[0] ^= f0 y[1] ^= f1 y[2] ^= f2 y[3] ^= f3 for j in range(1, M / 4): f0 ^= x[M - 2 * j] + f2 f1 ^= f3 - x[M - 2 * j + 1] y[4 * j] ^= f0 y[4 * j + 1] ^= f1 f2 ^= x[2 * j] - f0 f3 ^= x[2 * j + 1] - f1 y[4 * j + 2] ^= f2 y[4 * j + 3] ^= f3 y[M] ^= x[M / 2] + f2 y[M + 1] ^= 0.0 end_block() print
def gen_FFT_X2L_finish(funname, M): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block("void %s(%s *x, %s *y, %s s)\n" % (funname, basetype, basetype, type)) x = parameter("x", array=True) y = parameter("y", array=True) s = parameter("s") f0 = var("f0") f1 = var("f1") f2 = var("f2") f3 = var("f3") print # y[0] ^= s; # y[1] ^= 0.0; # y[2] ^= x[0] - y[0]; # y[3] ^= x[1]; # for j in range(1, M/4): # y[4*j] ^= x[M-2*j] + y[4*j-2]; # y[4*j+1] ^= y[4*j-1] - x[M-2*j+1]; # y[4*j+2] ^= x[2*j] - y[4*j]; # y[4*j+3] ^= x[2*j+1] - y[4*j+1]; # y[M] ^= x[M/2] + y[M-2]; # y[M+1] ^= 0.0; f0 ^= s f1 ^= 0.0 f2 ^= x[0] - f0 f3 ^= x[1] y[0] ^= f0 y[1] ^= f1 y[2] ^= f2 y[3] ^= f3 for j in range(1, M / 4): f0 ^= x[M - 2 * j] + f2 f1 ^= f3 - x[M - 2 * j + 1] y[4 * j] ^= f0 y[4 * j + 1] ^= f1 f2 ^= x[2 * j] - f0 f3 ^= x[2 * j + 1] - f1 y[4 * j + 2] ^= f2 y[4 * j + 3] ^= f3 y[M] ^= x[M / 2] + f2 y[M + 1] ^= 0.0 end_block() print
def gen_FFT_X2L_prepare(funname, M): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block('void %s(%s *y, %s *h)\n' % (funname, basetype, type)) y = parameter("y", array=True) f = var("f") g = var("g") s = var("s") print # A factor 2 is missing, to be compensated in D_w_over_M s ^= y[0] y[0] ^= s y[1] ^= y[1] for j in range(1, M / 2): f ^= y[2 * j + 1] g ^= y[2 * j] s ^= s + g if j == M / 4: y[2 * j] ^= g + f y[2 * j + 1] ^= 0 else: y[2 * j] ^= g + (sin(2.0 * pi * j / M)) * f y[2 * j + 1] ^= (cos(2.0 * pi * j / M)) * f print '\t*h = s;' end_block() print
def gen_FFT_X2L_prepare(funname, M): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block("void %s(%s *y, %s *h)\n" % (funname, basetype, type)) y = parameter("y", array=True) f = var("f") g = var("g") s = var("s") print # A factor 2 is missing, to be compensated in D_w_over_M s ^= y[0] y[0] ^= s y[1] ^= y[1] for j in range(1, M / 2): f ^= y[2 * j + 1] g ^= y[2 * j] s ^= s + g if j == M / 4: y[2 * j] ^= g + f y[2 * j + 1] ^= 0 else: y[2 * j] ^= g + (sin(2.0 * pi * j / M)) * f y[2 * j + 1] ^= (cos(2.0 * pi * j / M)) * f print "\t*h = s;" end_block() print
def gen_FFT_M2X_prepare(funname, M): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block('void %s(%s *x, %s *y, %s *h)\n' % (funname, basetype, basetype, type)) x = parameter("x", array=True) y = parameter("y", array=True) f0 = var("f0") f1 = var("f1") f2 = var("f2") f3 = var("f3") s = var("s") print #for j in range(M/4): # y[2*j] ^= x[4*j] + x[4*j+2] # y[2*j+1] ^= x[4*j+1] + x[4*j+3] # y[M-2*j-2] ^= x[4*j+4] - x[4*j+2] # y[M-2*j-1] ^= x[4*j+3] - x[4*j+5] f0 ^= x[0] f1 ^= x[1] f2 ^= x[2] f3 ^= x[3] s ^= 0 for j in range(M / 4): y[2 * j] ^= f0 + f2 y[2 * j + 1] ^= f1 + f3 f0 ^= x[4 * j + 4] f1 ^= x[4 * j + 5] y[M - 2 * j - 2] ^= f0 - f2 y[M - 2 * j - 1] ^= f3 - f1 if j < M / 4 - 1: if (j % 2): s ^= s + f0 else: s ^= s - f0 f2 ^= x[4 * j + 6] f3 ^= x[4 * j + 7] s ^= 2 * s + f0 + x[0] print '\t*h = s;' end_block() print
def gen_FFT_M2X_prepare(funname, M): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block("void %s(%s *x, %s *y, %s *h)\n" % (funname, basetype, basetype, type)) x = parameter("x", array=True) y = parameter("y", array=True) f0 = var("f0") f1 = var("f1") f2 = var("f2") f3 = var("f3") s = var("s") print # for j in range(M/4): # y[2*j] ^= x[4*j] + x[4*j+2] # y[2*j+1] ^= x[4*j+1] + x[4*j+3] # y[M-2*j-2] ^= x[4*j+4] - x[4*j+2] # y[M-2*j-1] ^= x[4*j+3] - x[4*j+5] f0 ^= x[0] f1 ^= x[1] f2 ^= x[2] f3 ^= x[3] s ^= 0 for j in range(M / 4): y[2 * j] ^= f0 + f2 y[2 * j + 1] ^= f1 + f3 f0 ^= x[4 * j + 4] f1 ^= x[4 * j + 5] y[M - 2 * j - 2] ^= f0 - f2 y[M - 2 * j - 1] ^= f3 - f1 if j < M / 4 - 1: if j % 2: s ^= s + f0 else: s ^= s - f0 f2 ^= x[4 * j + 6] f3 ^= x[4 * j + 7] s ^= 2 * s + f0 + x[0] print "\t*h = s;" end_block() print
def gen_core_eval_L_M(funname, p_max): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block('%s %s(int p, %s *scale, %s *LM, %s *Y)\n' % (type, funname, basetype, basetype, basetype)) LM = parameter("LM", array=True, base_array=True) Y = parameter("Y", array=True) scale = parameter("scale", array=True) h = var("h") c = var("c") two = var("two", 2.0) print h ^= 0 print begin_block("switch(p) ") for j in range(0, p_max + 1): print "#if FMM_P_MAX >= %i" % j for j in range(p_max, -1, -1): print "\tcase %i:" % j if j == 0: c ^= LM[Re(j, 0)] * Y[Re(j, 0)] else: c ^= LM[Re(j, 1)] * Y[Re(j, 1)] - LM[Im(j, 1)] * Y[Im(j, 1)] for k in range(2, j + 1): c += LM[Re(j, k)] * Y[Re(j, k)] - LM[Im(j, k)] * Y[Im(j, k)] c ^= two * c + LM[Re(j, 0)] * Y[Re(j, 0)] #h ^= c + scale*h h += scale[j] * c print "#endif /* FMM_P_MAX >= %i */" % j end_block() print '\treturn h;' end_block() print
def gen_core_gen_M_L_dipole_plus(funname, p_max): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block( "void %s(int p, %s *scale, %s mx, %s my, %s mz, %s *Y, %s *L)\n" % (funname, basetype, type, type, type, basetype, basetype) ) scale = parameter("scale", array=True) mx = parameter("mx") my = parameter("my") mz = parameter("mz") Y = parameter("Y", array=True) L = parameter("L", array=True, horiz_add=-1) f = var("f") print begin_block("switch(p) ") for j in range(0, p_max + 1): print "#if FMM_P_MAX >= %i" % j for j in range(p_max, -1, -1): print "\tcase %i:" % j # print "\t\tf = scale[%i];" % j f ^= scale[j] for k in range(j + 1 - 1, 2 - 1, -1): L[Im(j, k)] += f * ( (0.5 * sqrt(float((j + k + 1) * (j + k + 2)))) * (my * Y[Re(j + 1, k + 1)] + mx * Y[Im(j + 1, k + 1)]) + (0.5 * sqrt(float((j - k + 1) * (j - k + 2)))) * (my * Y[Re(j + 1, k - 1)] - mx * Y[Im(j + 1, k - 1)]) - (sqrt(float((j - k + 1) * (j + k + 1)))) * mz * Y[Im(j + 1, k)] ) L[Re(j, k)] += f * ( (0.5 * sqrt(float((j + k + 1) * (j + k + 2)))) * (mx * Y[Re(j + 1, k + 1)] - my * Y[Im(j + 1, k + 1)]) - (0.5 * sqrt(float((j - k + 1) * (j - k + 2)))) * (mx * Y[Re(j + 1, k - 1)] + my * Y[Im(j + 1, k - 1)]) - (sqrt(float((j - k + 1) * (j + k + 1)))) * mz * Y[Re(j + 1, k)] ) if j > 0: L[Im(j, 1)] += f * ( (0.5 * sqrt(float((j + 1 + 1) * (j + 1 + 2)))) * (my * Y[Re(j + 1, 1 + 1)] + mx * Y[Im(j + 1, 1 + 1)]) + (0.5 * sqrt(float((j - 1 + 1) * (j - 1 + 2)))) * my * Y[Re(j + 1, 1 - 1)] - (sqrt(float((j - 1 + 1) * (j + 1 + 1)))) * mz * Y[Im(j + 1, 1)] ) L[Re(j, 1)] += f * ( (0.5 * sqrt(float((j + 1 + 1) * (j + 1 + 2)))) * (mx * Y[Re(j + 1, 1 + 1)] - my * Y[Im(j + 1, 1 + 1)]) - (0.5 * sqrt(float((j - 1 + 1) * (j - 1 + 2)))) * mx * Y[Re(j + 1, 1 - 1)] - (sqrt(float((j - 1 + 1) * (j + 1 + 1)))) * mz * Y[Re(j + 1, 1)] ) L[Im(j, 0)] += 0 L[Re(j, 0)] += f * ( (sqrt(float(j + 1) * (j + 2))) * (mx * Y[Re(j + 1, 1)] - my * Y[Im(j + 1, 1)]) - (j + 1) * mz * Y[Re(j + 1, 0)] ) print "#endif /* FMM_P_MAX >= %i */" % j end_block() end_block() print
def gen_core_eval_L_M(funname, p_max): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block("%s %s(int p, %s *scale, %s *LM, %s *Y)\n" % (type, funname, basetype, basetype, basetype)) LM = parameter("LM", array=True, base_array=True) Y = parameter("Y", array=True) scale = parameter("scale", array=True) h = var("h") c = var("c") two = var("two", 2.0) print h ^= 0 print begin_block("switch(p) ") for j in range(0, p_max + 1): print "#if FMM_P_MAX >= %i" % j for j in range(p_max, -1, -1): print "\tcase %i:" % j if j == 0: c ^= LM[Re(j, 0)] * Y[Re(j, 0)] else: c ^= LM[Re(j, 1)] * Y[Re(j, 1)] - LM[Im(j, 1)] * Y[Im(j, 1)] for k in range(2, j + 1): c += LM[Re(j, k)] * Y[Re(j, k)] - LM[Im(j, k)] * Y[Im(j, k)] c ^= two * c + LM[Re(j, 0)] * Y[Re(j, 0)] # h ^= c + scale*h h += scale[j] * c print "#endif /* FMM_P_MAX >= %i */" % j end_block() print "\treturn h;" end_block() print
def gen_core_gen_M_L_dipole_minus(funname, p_max): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block( "void %s(int p, %s *scale, %s mx, %s my, %s mz, %s *Y, %s *M)\n" % (funname, basetype, type, type, type, basetype, basetype) ) scale = parameter("scale") mx = parameter("mx") my = parameter("my") mz = parameter("mz") Y = parameter("Y", array=True) M = parameter("M", array=True, horiz_add=-1) f = var("f") print begin_block("switch(p) ") for j in range(1, p_max + 1): print "#if FMM_P_MAX >= %i" % j for j in range(p_max, 3 - 1, -1): print "\tcase %i:" % j ##print "\t\tf = scale[%i];" % j f ^= scale[j] M[Im(j, j)] += f * ( (0.5 * sqrt(float(2 * j - 1) * (2 * j))) * (my * Y[Re(j - 1, j - 1)] - mx * Y[Im(j - 1, j - 1)]) ) M[Re(j, j)] += f * ( (-0.5 * sqrt(float(2 * j - 1) * (2 * j))) * (mx * Y[Re(j - 1, j - 1)] + my * Y[Im(j - 1, j - 1)]) ) if j > 1: M[Im(j, j - 1)] += f * ( (0.5 * sqrt(float((2 * j - 2) * (2 * j - 1)))) * (my * Y[Re(j - 1, j - 2)] - mx * Y[Im(j - 1, j - 2)]) + (sqrt(float(2 * j - 1))) * mz * Y[Im(j - 1, j - 1)] ) M[Re(j, j - 1)] += f * ( (-0.5 * sqrt(float((2 * j - 2) * (2 * j - 1)))) * (mx * Y[Re(j - 1, j - 2)] + my * Y[Im(j - 1, j - 2)]) + (sqrt(float(2 * j - 1))) * mz * Y[Re(j - 1, j - 1)] ) for k in range(j - 2 + 1 - 1, 2 - 1, -1): M[Im(j, k)] += f * ( (0.5 * sqrt(float((j - k - 1) * (j - k)))) * (my * Y[Re(j - 1, k + 1)] + mx * Y[Im(j - 1, k + 1)]) + (0.5 * sqrt(float((j + k - 1) * (j + k)))) * (my * Y[Re(j - 1, k - 1)] - mx * Y[Im(j - 1, k - 1)]) + (sqrt(float((j - k) * (j + k)))) * mz * Y[Im(j - 1, k)] ) M[Re(j, k)] += f * ( (0.5 * sqrt(float((j - k - 1) * (j - k)))) * (mx * Y[Re(j - 1, k + 1)] - my * Y[Im(j - 1, k + 1)]) - (0.5 * sqrt(float((j + k - 1) * (j + k)))) * (mx * Y[Re(j - 1, k - 1)] + my * Y[Im(j - 1, k - 1)]) + (sqrt(float((j - k) * (j + k)))) * mz * Y[Re(j - 1, k)] ) M[Im(j, 1)] += f * ( (0.5 * sqrt(float((j - 1 - 1) * (j - 1)))) * (my * Y[Re(j - 1, 1 + 1)] + mx * Y[Im(j - 1, 1 + 1)]) + (0.5 * sqrt(float((j + 1 - 1) * (j + 1)))) * my * Y[Re(j - 1, 1 - 1)] + (sqrt(float((j - 1) * (j + 1)))) * mz * Y[Im(j - 1, 1)] ) M[Re(j, 1)] += f * ( (0.5 * sqrt(float((j - 1 - 1) * (j - 1)))) * (mx * Y[Re(j - 1, 1 + 1)] - my * Y[Im(j - 1, 1 + 1)]) - (0.5 * sqrt(float((j + 1 - 1) * (j + 1)))) * mx * Y[Re(j - 1, 1 - 1)] + (sqrt(float((j - 1) * (j + 1)))) * mz * Y[Re(j - 1, 1)] ) M[Im(j, 0)] += 0 M[Re(j, 0)] += f * ( j * mz * Y[Re(j - 1, 0)] + sqrt(float((j - 1) * j)) * (mx * Y[Re(j - 1, 1)] - my * Y[Im(j - 1, 1)]) ) print "#endif /* FMM_P_MAX >= %i */" % j print "\tcase 2:" f ^= scale[2] M[11] += f * (sqrt(float(3)) * (my * Y[4] - mx * Y[5])) M[10] += f * ((-sqrt(float(3))) * (mx * Y[4] + my * Y[5])) M[9] += f * (sqrt(float(1.5)) * my * Y[2] + sqrt(float(3)) * mz * Y[5]) M[8] += f * ((-sqrt(float(1.5))) * mx * Y[2] + sqrt(float(3)) * mz * Y[4]) M[7] += 0 M[6] += f * (2.0 * mz * Y[2] + sqrt(float(2)) * (mx * Y[4] - my * Y[5])) print "#endif /* FMM_P_MAX >= 2 */" print "\tcase 1:" f ^= scale[1] M[5] += f * sqrt(float(0.5)) * my * Y[0] M[4] += f * (-sqrt(float(0.5))) * mx * Y[0] M[3] += 0 M[2] += f * mz * Y[0] print "#endif /* FMM_P_MAX >= 1 */" print "\tcase 0:" M[1] += 0 M[0] += 0 end_block() end_block() print
def gen_core_gen_M_L_dipole_minus(funname, p_max): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block( 'void %s(int p, %s *scale, %s mx, %s my, %s mz, %s *Y, %s *M)\n' % (funname, basetype, type, type, type, basetype, basetype)) scale = parameter("scale") mx = parameter("mx") my = parameter("my") mz = parameter("mz") Y = parameter("Y", array=True) M = parameter("M", array=True, horiz_add=-1) f = var("f") print begin_block("switch(p) ") for j in range(1, p_max + 1): print "#if FMM_P_MAX >= %i" % j for j in range(p_max, 3 - 1, -1): print "\tcase %i:" % j ##print "\t\tf = scale[%i];" % j f ^= scale[j] M[Im(j, j)] += f * ((0.5 * sqrt(float(2 * j - 1) * (2 * j))) * (my * Y[Re(j - 1, j - 1)] - mx * Y[Im(j - 1, j - 1)])) M[Re(j, j)] += f * ((-0.5 * sqrt(float(2 * j - 1) * (2 * j))) * (mx * Y[Re(j - 1, j - 1)] + my * Y[Im(j - 1, j - 1)])) if j > 1: M[Im(j, j - 1)] += f * ( (0.5 * sqrt(float((2 * j - 2) * (2 * j - 1)))) * (my * Y[Re(j - 1, j - 2)] - mx * Y[Im(j - 1, j - 2)]) + (sqrt(float(2 * j - 1))) * mz * Y[Im(j - 1, j - 1)]) M[Re(j, j - 1)] += f * ( (-0.5 * sqrt(float((2 * j - 2) * (2 * j - 1)))) * (mx * Y[Re(j - 1, j - 2)] + my * Y[Im(j - 1, j - 2)]) + (sqrt(float(2 * j - 1))) * mz * Y[Re(j - 1, j - 1)]) for k in range(j - 2 + 1 - 1, 2 - 1, -1): M[Im(j, k)] += f * ( (0.5 * sqrt(float((j - k - 1) * (j - k)))) * (my * Y[Re(j - 1, k + 1)] + mx * Y[Im(j - 1, k + 1)]) + (0.5 * sqrt(float((j + k - 1) * (j + k)))) * (my * Y[Re(j - 1, k - 1)] - mx * Y[Im(j - 1, k - 1)]) + (sqrt(float((j - k) * (j + k)))) * mz * Y[Im(j - 1, k)]) M[Re(j, k)] += f * ( (0.5 * sqrt(float((j - k - 1) * (j - k)))) * (mx * Y[Re(j - 1, k + 1)] - my * Y[Im(j - 1, k + 1)]) - (0.5 * sqrt(float((j + k - 1) * (j + k)))) * (mx * Y[Re(j - 1, k - 1)] + my * Y[Im(j - 1, k - 1)]) + (sqrt(float((j - k) * (j + k)))) * mz * Y[Re(j - 1, k)]) M[Im( j, 1)] += f * ((0.5 * sqrt(float((j - 1 - 1) * (j - 1)))) * (my * Y[Re(j - 1, 1 + 1)] + mx * Y[Im(j - 1, 1 + 1)]) + (0.5 * sqrt(float( (j + 1 - 1) * (j + 1)))) * my * Y[Re(j - 1, 1 - 1)] + (sqrt(float( (j - 1) * (j + 1)))) * mz * Y[Im(j - 1, 1)]) M[Re( j, 1)] += f * ((0.5 * sqrt(float((j - 1 - 1) * (j - 1)))) * (mx * Y[Re(j - 1, 1 + 1)] - my * Y[Im(j - 1, 1 + 1)]) - (0.5 * sqrt(float( (j + 1 - 1) * (j + 1)))) * mx * Y[Re(j - 1, 1 - 1)] + (sqrt(float( (j - 1) * (j + 1)))) * mz * Y[Re(j - 1, 1)]) M[Im(j, 0)] += 0 M[Re(j, 0)] += f * (j * mz * Y[Re(j - 1, 0)] + sqrt(float( (j - 1) * j)) * (mx * Y[Re(j - 1, 1)] - my * Y[Im(j - 1, 1)])) print "#endif /* FMM_P_MAX >= %i */" % j print "\tcase 2:" f ^= scale[2] M[11] += f * (sqrt(float(3)) * (my * Y[4] - mx * Y[5])) M[10] += f * ((-sqrt(float(3))) * (mx * Y[4] + my * Y[5])) M[9] += f * (sqrt(float(1.5)) * my * Y[2] + sqrt(float(3)) * mz * Y[5]) M[8] += f * ((-sqrt(float(1.5))) * mx * Y[2] + sqrt(float(3)) * mz * Y[4]) M[7] += 0 M[6] += f * (2.0 * mz * Y[2] + sqrt(float(2)) * (mx * Y[4] - my * Y[5])) print "#endif /* FMM_P_MAX >= 2 */" print "\tcase 1:" f ^= scale[1] M[5] += f * sqrt(float(0.5)) * my * Y[0] M[4] += f * (-sqrt(float(0.5))) * mx * Y[0] M[3] += 0 M[2] += f * mz * Y[0] print "#endif /* FMM_P_MAX >= 1 */" print "\tcase 0:" M[1] += 0 M[0] += 0 end_block() end_block() print
def gen_core_eval_L_M_grad_plus(funname, p_max): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block( "void %s(int p, %s *scale, %s *M, %s *Y, %s *x, %s *y, %s *z)\n" % (funname, basetype, basetype, basetype, type, type, type) ) M = parameter("M", array=True, base_array=True) Y = parameter("Y", array=True) scale = parameter("scale", array=True) hx = var("hx") hy = var("hy") hz = var("hz") cx = var("cx") cy = var("cy") cz = var("cz") f1 = var("f1") f2 = var("f2") print hx ^= 0 hy ^= 0 hz ^= 0 print begin_block("switch(p) ") for j in range(0, p_max + 1): print "#if FMM_P_MAX >= %i" % j for j in range(p_max, -1, -1): print "\tcase %i:" % j cx ^= -sqrt(float((j + 2) * (j + 1))) * M[Re(j, 0)] * Y[Re(j + 1, 1)] cy ^= -sqrt(float((j + 2) * (j + 1))) * M[Re(j, 0)] * Y[Im(j + 1, 1)] cz ^= (j + 1) * M[Re(j, 0)] * Y[Re(j + 1, 0)] for k in range(1, j + 1): f1 ^= sqrt(float((j + k + 2) * (j + k + 1))) f2 ^= sqrt(float((j - k + 2) * (j - k + 1))) cx ^= ( cx - f1 * (M[Re(j, k)] * Y[Re(j + 1, k + 1)] - M[Im(j, k)] * Y[Im(j + 1, k + 1)]) - f2 * (M[Im(j, k)] * Y[Im(j + 1, k - 1)] - M[Re(j, k)] * Y[Re(j + 1, k - 1)]) ) cy ^= ( cy - f1 * (M[Im(j, k)] * Y[Re(j + 1, k + 1)] + M[Re(j, k)] * Y[Im(j + 1, k + 1)]) - f2 * (M[Im(j, k)] * Y[Re(j + 1, k - 1)] + M[Re(j, k)] * Y[Im(j + 1, k - 1)]) ) cz ^= cz - 2.0 * sqrt(float((j + k + 1) * (j - k + 1))) * ( M[Im(j, k)] * Y[Im(j + 1, k)] - M[Re(j, k)] * Y[Re(j + 1, k)] ) hx ^= hx + scale[j] * cx hy ^= hy + scale[j] * cy hz ^= hz + scale[j] * cz print "#endif /* FMM_P_MAX >= %i */" % j end_block() print "\t*x = hx;" print "\t*y = hy;" print "\t*z = hz;" end_block() print
def gen_core_eval_L_M_grad_minus(funname, p_max): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block( "void %s(int p, %s *scale, %s *L, %s *Y, %s *x, %s *y, %s *z)\n" % (funname, basetype, basetype, basetype, type, type, type) ) L = parameter("L", array=True, base_array=True) Y = parameter("Y", array=True) scale = parameter("scale", array=True) hx = var("hx") hy = var("hy") hz = var("hz") cx = var("cx") cy = var("cy") cz = var("cz") f1 = var("f1") f2 = var("f2") print hx ^= 0 hy ^= 0 hz ^= 0 print begin_block("switch(p) ") for j in range(1, p_max + 1): print "#if FMM_P_MAX >= %i" % j for j in range(p_max, 0, -1): print "\tcase %i:" % j cx ^= sqrt(float(j * (j - 1))) * L[Re(j, 0)] * Y[Re(j - 1, 1)] cy ^= sqrt(float(j * (j - 1))) * L[Re(j, 0)] * Y[Im(j - 1, 1)] cz ^= j * L[Re(j, 0)] * Y[Re(j - 1, 0)] for k in range(1, j + 1): if k <= j - 2: f1 ^= sqrt(float((j - k) * (j - k - 1))) f2 ^= sqrt(float((j + k) * (j + k - 1))) cx ^= ( cx + f1 * (L[Re(j, k)] * Y[Re(j - 1, k + 1)] - L[Im(j, k)] * Y[Im(j - 1, k + 1)]) + f2 * (L[Im(j, k)] * Y[Im(j - 1, k - 1)] - L[Re(j, k)] * Y[Re(j - 1, k - 1)]) ) cy ^= ( cy + f1 * (L[Im(j, k)] * Y[Re(j - 1, k + 1)] + L[Re(j, k)] * Y[Im(j - 1, k + 1)]) + f2 * (L[Im(j, k)] * Y[Re(j - 1, k - 1)] + L[Re(j, k)] * Y[Im(j - 1, k - 1)]) ) else: cx ^= cx - sqrt(float((j + k) * (j + k - 1))) * ( L[Re(j, k)] * Y[Re(j - 1, k - 1)] - L[Im(j, k)] * Y[Im(j - 1, k - 1)] ) cy ^= cy + sqrt(float((j + k) * (j + k - 1))) * ( L[Im(j, k)] * Y[Re(j - 1, k - 1)] + L[Re(j, k)] * Y[Im(j - 1, k - 1)] ) cz ^= cz + 2.0 * sqrt(float((j + k) * (j - k))) * ( L[Re(j, k)] * Y[Re(j - 1, k)] - L[Im(j, k)] * Y[Im(j - 1, k)] ) hx ^= hx + scale[j] * cx hy ^= hy + scale[j] * cy hz ^= hz + scale[j] * cz print "#endif /* FMM_P_MAX >= %i */" % j end_block() print "\t*x = hx;" print "\t*y = hy;" print "\t*z = hz;" end_block() print
def gen_core_gen_M_L_dipole_plus(funname, p_max): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block( 'void %s(int p, %s *scale, %s mx, %s my, %s mz, %s *Y, %s *L)\n' % (funname, basetype, type, type, type, basetype, basetype)) scale = parameter("scale", array=True) mx = parameter("mx") my = parameter("my") mz = parameter("mz") Y = parameter("Y", array=True) L = parameter("L", array=True, horiz_add=-1) f = var("f") print begin_block("switch(p) ") for j in range(0, p_max + 1): print "#if FMM_P_MAX >= %i" % j for j in range(p_max, -1, -1): print "\tcase %i:" % j #print "\t\tf = scale[%i];" % j f ^= scale[j] for k in range(j + 1 - 1, 2 - 1, -1): L[Im(j, k)] += f * ( (0.5 * sqrt(float((j + k + 1) * (j + k + 2)))) * (my * Y[Re(j + 1, k + 1)] + mx * Y[Im(j + 1, k + 1)]) + (0.5 * sqrt(float((j - k + 1) * (j - k + 2)))) * (my * Y[Re(j + 1, k - 1)] - mx * Y[Im(j + 1, k - 1)]) - (sqrt(float( (j - k + 1) * (j + k + 1)))) * mz * Y[Im(j + 1, k)]) L[Re(j, k)] += f * ( (0.5 * sqrt(float((j + k + 1) * (j + k + 2)))) * (mx * Y[Re(j + 1, k + 1)] - my * Y[Im(j + 1, k + 1)]) - (0.5 * sqrt(float((j - k + 1) * (j - k + 2)))) * (mx * Y[Re(j + 1, k - 1)] + my * Y[Im(j + 1, k - 1)]) - (sqrt(float( (j - k + 1) * (j + k + 1)))) * mz * Y[Re(j + 1, k)]) if j > 0: L[Im(j, 1)] += f * ( (0.5 * sqrt(float((j + 1 + 1) * (j + 1 + 2)))) * (my * Y[Re(j + 1, 1 + 1)] + mx * Y[Im(j + 1, 1 + 1)]) + (0.5 * sqrt(float( (j - 1 + 1) * (j - 1 + 2)))) * my * Y[Re(j + 1, 1 - 1)] - (sqrt(float( (j - 1 + 1) * (j + 1 + 1)))) * mz * Y[Im(j + 1, 1)]) L[Re(j, 1)] += f * ( (0.5 * sqrt(float((j + 1 + 1) * (j + 1 + 2)))) * (mx * Y[Re(j + 1, 1 + 1)] - my * Y[Im(j + 1, 1 + 1)]) - (0.5 * sqrt(float( (j - 1 + 1) * (j - 1 + 2)))) * mx * Y[Re(j + 1, 1 - 1)] - (sqrt(float( (j - 1 + 1) * (j + 1 + 1)))) * mz * Y[Re(j + 1, 1)]) L[Im(j, 0)] += 0 L[Re(j, 0)] += f * ((sqrt(float(j + 1) * (j + 2))) * (mx * Y[Re(j + 1, 1)] - my * Y[Im(j + 1, 1)]) - (j + 1) * mz * Y[Re(j + 1, 0)]) print "#endif /* FMM_P_MAX >= %i */" % j end_block() end_block() print
def gen_spherical_harmonics(funname, p): Fact = [0.0 for i in range(2 * p + 1)] Fact[0] = 1.0 for n in range(1, len(Fact)): Fact[n] = n * Fact[n - 1] B = [0.0 for i in range((p * (p + 1)) / 2 + p + 1)] for n in range(p + 1): for m in range(n + 1): B[J(n, m)] = math.sqrt(Fact[n - abs(m)] / Fact[n + abs(m)]) R = [0.0] + [1.0 / i for i in range(1, p + 1)] type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block( 'void %s_p%i(int p, %s *Y, %s sin_phi, %s cos_phi, %s cos_theta)\n' % (funname, p, basetype, type, type, type)) Y = parameter("Y", array=True) sin_phi = parameter("sin_phi") cos_phi = parameter("cos_phi") cos_theta = parameter("cos_theta") pmm = var("pmm") pm1 = var("pm1") pm2 = var("pm2") pml = var("pml") c = var("c") s = var("s") h = var("h") alpha = var("alpha") beta = var("beta") sqrt_1_minus_cos_theta_2 = var("sqrt_1_minus_cos_theta_2", sqrt(1.0 - cos_theta * cos_theta)) print pmm ^= 1.0 ## m==0: ############################# Y[0] ^= B[0] * pmm Y[1] ^= 0.0 pm2 ^= pmm pml ^= pmm * cos_theta Y[2] ^= pml Y[3] ^= 0.0 k = 1 for l in range(2, p + 1): pm1 ^= pml pml ^= R[l] * ((2 * l - 1) * cos_theta * pm1 - (l - 1) * pm2) pm2 ^= pm1 k += l Y[2 * k] ^= pml Y[2 * k + 1] ^= 0.0 ## m==1: ############################# m = 1 pmm *= -sqrt_1_minus_cos_theta_2 s ^= sin_phi c ^= cos_phi alpha ^= 1 - c beta ^= s h ^= B[2] * pmm Y[4] ^= c * h Y[5] ^= s * h pm2 ^= pmm pml ^= 3 * pmm * cos_theta h ^= B[4] * pml Y[8] ^= c * h Y[9] ^= s * h #k = (m+1)*(m+2)/2 + m k = 4 for l in range(3, p + 1): pm1 ^= pml pml ^= R[l - 1] * ((2 * l - 1) * cos_theta * pm1 - l * pm2) pm2 ^= pm1 k += l h ^= B[k] * pml Y[2 * k] ^= c * h Y[2 * k + 1] ^= s * h ## 2 <= m <= p-1: ############################# kk = 1 for m in range(2, p): pmm *= (1 - 2 * m) * sqrt_1_minus_cos_theta_2 h ^= (alpha * c + beta * s) s ^= s - alpha * s + beta * c # to simplify transformation to fma, fms, fnms c -= h kk += m k = kk + m h ^= B[k] * pmm Y[2 * k] ^= c * h Y[2 * k + 1] ^= s * h pm2 ^= pmm pml ^= (2 * m + 1) * pmm * cos_theta k += m + 1 h ^= B[k] * pml Y[2 * k] ^= c * h Y[2 * k + 1] ^= s * h for l in range(m + 2, p + 1): pm1 ^= pml pml ^= R[l - m] * ((2 * l - 1) * cos_theta * pm1 - (l + m - 1) * pm2) pm2 ^= pm1 k += l h ^= B[k] * pml Y[2 * k] ^= c * h Y[2 * k + 1] ^= s * h ## m==p: ############################# m = p pmm *= (1 - 2 * m) * sqrt_1_minus_cos_theta_2 h ^= (alpha * c + beta * s) s ^= s - alpha * s + beta * c # to simplify transformation to fma, fms, fnms c -= h kk += m k = kk + m h ^= B[k] * pmm Y[2 * k] ^= c * h Y[2 * k + 1] ^= s * h end_block()
def gen_core_eval_L_M_grad_minus(funname, p_max): type = Op.templates["type"] basetype = Op.templates["basetype"] begin_block( 'void %s(int p, %s *scale, %s *L, %s *Y, %s *x, %s *y, %s *z)\n' % (funname, basetype, basetype, basetype, type, type, type)) L = parameter("L", array=True, base_array=True) Y = parameter("Y", array=True) scale = parameter("scale", array=True) hx = var("hx") hy = var("hy") hz = var("hz") cx = var("cx") cy = var("cy") cz = var("cz") f1 = var("f1") f2 = var("f2") print hx ^= 0 hy ^= 0 hz ^= 0 print begin_block("switch(p) ") for j in range(1, p_max + 1): print "#if FMM_P_MAX >= %i" % j for j in range(p_max, 0, -1): print "\tcase %i:" % j cx ^= sqrt(float(j * (j - 1))) * L[Re(j, 0)] * Y[Re(j - 1, 1)] cy ^= sqrt(float(j * (j - 1))) * L[Re(j, 0)] * Y[Im(j - 1, 1)] cz ^= j * L[Re(j, 0)] * Y[Re(j - 1, 0)] for k in range(1, j + 1): if k <= j - 2: f1 ^= sqrt(float((j - k) * (j - k - 1))) f2 ^= sqrt(float((j + k) * (j + k - 1))) cx ^= cx + f1*(L[Re(j,k)]*Y[Re(j-1,k+1)] - L[Im(j,k)]*Y[Im(j-1,k+1)])\ + f2*(L[Im(j,k)]*Y[Im(j-1,k-1)] - L[Re(j,k)]*Y[Re(j-1,k-1)]) cy ^= cy + f1*(L[Im(j,k)]*Y[Re(j-1,k+1)] + L[Re(j,k)]*Y[Im(j-1,k+1)])\ + f2*(L[Im(j,k)]*Y[Re(j-1,k-1)] + L[Re(j,k)]*Y[Im(j-1,k-1)]) else: cx ^= cx - sqrt(float( (j + k) * (j + k - 1))) * (L[Re(j, k)] * Y[Re(j - 1, k - 1)] - L[Im(j, k)] * Y[Im(j - 1, k - 1)]) cy ^= cy + sqrt(float( (j + k) * (j + k - 1))) * (L[Im(j, k)] * Y[Re(j - 1, k - 1)] + L[Re(j, k)] * Y[Im(j - 1, k - 1)]) cz ^= cz + 2.0 * sqrt(float( (j + k) * (j - k))) * (L[Re(j, k)] * Y[Re(j - 1, k)] - L[Im(j, k)] * Y[Im(j - 1, k)]) hx ^= hx + scale[j] * cx hy ^= hy + scale[j] * cy hz ^= hz + scale[j] * cz print "#endif /* FMM_P_MAX >= %i */" % j end_block() print '\t*x = hx;' print '\t*y = hy;' print '\t*z = hz;' end_block() print