-
Notifications
You must be signed in to change notification settings - Fork 0
/
Electromagnetism.py
122 lines (97 loc) · 4.32 KB
/
Electromagnetism.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
from string import Template
class PIC:
def __init__(self, numParts, E, B, step, wx=1, wy=1, wz=1):
self.numParts = numParts
f = open("cudaTools.cu","r")
cuTools = f.read()
f.close()
self.subs = { 'Ex':E.x, 'Ey':E.y, 'Ez':E.z,
'Bx':B.x, 'By':B.y, 'Bz':B.z,
'wx':wx, 'wy':wy, 'wz':wz,
'step':step, 'numParts':numParts, 'cudaTools':cuTools }
self.mod_template = Template("""//CUDA//
${cudaTools}
__device__ void boris(float* v)
{
float vmx = v[0] + (${step}/2) * ${Ex};
float vmy = v[1] + (${step}/2) * ${Ey};
float vmz = v[2] + (${step}/2) * ${Ez};
float tx = (${step}/2) * ${Bx};
float ty = (${step}/2) * ${By};
float tz = (${step}/2) * ${Bz};
float vprx = vmx + (vmy*tz - vmz*ty);
float vpry = vmy + (vmz*tx - vmx*tz);
float vprz = vmz + (vmx*ty - vmy*tx);
float sx = (2/(1+pow(tx,2)+pow(ty,2)+pow(tz,2))) * tx;
float sy = (2/(1+pow(tx,2)+pow(ty,2)+pow(tz,2))) * ty;
float sz = (2/(1+pow(tx,2)+pow(ty,2)+pow(tz,2))) * tz;
float vpx = vmx + (vpry*sz - vprz*sy);
float vpy = vmy + (vprz*sx - vprx*sz);
float vpz = vmz + (vprx*sy - vpry*sx);
v[0] = vpx + (${step}/2) * ${Ex};
v[1] = vpy + (${step}/2) * ${Ey};
v[2] = vpz + (${step}/2) * ${Ez};
}
__device__ void euler(float* x, float* v)
{
x[0] = x[0] + ${step}*v[0];
x[1] = x[1] + ${step}*v[1];
x[2] = x[2] + ${step}*v[2];
}
__device__ void periodic(float* x)
{
x[0] = x[0] - ${wx}*floorf(x[0]/${wx});
x[1] = x[1] - ${wy}*floorf(x[1]/${wy});
x[2] = x[2] - ${wz}*floorf(x[2]/${wz});
}
__global__ void multiBoris(float* x, float* v, int n)
{
int id = getGlobalId();
int tid = getThreadId();
if(id < ${numParts})
{
// Setup cache.
// The first index specifies the thread (within a block, ie 0-1023)
// the second index specifies x or v
// the thrid index is the component.
__shared__ float cache[1024][2][3];
// load cache
cache[tid][0][0] = x[index(id,0)];
cache[tid][0][1] = x[index(id,1)];
cache[tid][0][2] = x[index(id,2)];
cache[tid][1][0] = v[index(id,0)];
cache[tid][1][1] = v[index(id,1)];
cache[tid][1][2] = v[index(id,2)];
for(int i=0;i<n;i++)
{
// Update velocities
boris(cache[tid][1]);
// Move particles
euler(cache[tid][0],cache[tid][1]);
// Periodic boundary conditions
periodic(cache[tid][0]);
}
// unload cache
x[index(id,0)] = cache[tid][0][0];
x[index(id,1)] = cache[tid][0][1];
x[index(id,2)] = cache[tid][0][2];
v[index(id,0)] = cache[tid][0][0];
v[index(id,1)] = cache[tid][0][1];
v[index(id,2)] = cache[tid][0][2];
}
}
""")
self.mod = SourceModule(self.mod_template.substitute(self.subs))
self.cuBoris = self.mod.get_function('multiBoris')
def stepN(self,positions,velocities,n):
x_gpu = cuda.mem_alloc(positions.nbytes)
v_gpu = cuda.mem_alloc(velocities.nbytes)
cuda.memcpy_htod(x_gpu,positions)
cuda.memcpy_htod(v_gpu,velocities)
import numpy as np
self.cuBoris(x_gpu, v_gpu, np.int32(n), block=(1024,1,1), grid=(self.numParts/1024 + 1,1))
cuda.memcpy_dtoh(positions,x_gpu)
cuda.memcpy_dtoh(velocities,v_gpu)