def update_rk4(self, psi_buf): dt = self.dt tn = self.tn k1, k2 = self.k1_buf, self.k2_buf k3, k4 = self.k3_buf, self.k4_buf psi2 = self.psi2_buf psi = psi_buf gs = self.inside.gs self.inside.compute_rhs(psi, k1) prg.add(queue, (gs,), None, i4(gs), f8(0.5*dt), k1, psi, psi2) self.inside.compute_rhs(psi2, k2) prg.add(queue, (gs,), None, i4(gs), f8(0.5*dt), k2, psi, psi2) self.inside.compute_rhs(psi2, k3) prg.add(queue, (gs,), None, i4(gs), f8(dt), k3, psi, psi2) self.inside.compute_rhs(psi2, k4) prg.rk4_add(queue, (gs,), None, i4(gs), f8(dt), k1, k2, k3, k4, psi) tn += dt
def compute_rhs(self, psi_buf, ret_psi_buf): nelem = self.nelem ngll = self.ngll dvvT_buf = self.dvvT_buf J_buf = self.J_buf AI_buf = self.AI_buf velocity_buf = self.state.velocity_buf gs = self.gs prg.compute_rhs(queue, (gs,), None, i4(nelem), i4(ngll), dvvT_buf, J_buf, AI_buf, velocity_buf, psi_buf, ret_psi_buf) self.interact.interact_between_elems_inner(ret_psi_buf)
def interact_between_elems_inner(self, var_buf): mvp_inner2_buf = self.mvp_inner2_buf mvp_inner3_buf = self.mvp_inner3_buf mvp_inner4_buf = self.mvp_inner4_buf gs2, gs3, gs4 = self.gs2, self.gs3, self.gs4 ngll = self.ngll prg.interact_inner2(queue, (gs2,), None, i4(gs2), i4(ngll), mvp_inner2_buf, var_buf) prg.interact_inner3(queue, (gs3,), None, i4(gs3), i4(ngll), mvp_inner3_buf, var_buf) prg.interact_inner4(queue, (gs4,), None, i4(gs4), i4(ngll), mvp_inner4_buf, var_buf)
cl.enqueue_copy(queue, t_buf, t) for t, t_buf in zip([vx,vy,vz], [vx_buf,vy_buf,vz_buf]): cl.enqueue_copy(queue, t_buf, t) #------------------------------------------------------------------------------ # program and kernel #------------------------------------------------------------------------------ kernels = open('LJ_10.cl').read() prg = cl.Program(context, kernels).build(options='') preferred_multiple = cl.Kernel(prg, 'force').get_work_group_info( \ cl.kernel_work_group_info.PREFERRED_WORK_GROUP_SIZE_MULTIPLE, device) force_args = [i4(n), f8(lje), f8(ljs), x_buf, y_buf, z_buf, fx_buf, fy_buf, fz_buf] solve_args = [i4(n), i4(dt), f8(em), x_buf, y_buf, z_buf, vx_buf, vy_buf, vz_buf, fx_buf, fy_buf, fz_buf] energy_args = [i4(n), f8(em), f8(lje), f8(ljs), x_buf, y_buf, z_buf, vx_buf, vy_buf, vz_buf, ke_group_buf, pe_group_buf] #------------------------------------------------------------------------------ # dynamics #------------------------------------------------------------------------------ for tstep in xrange(1,mt+1): prg.force(queue, Gs, Ls, *force_args) prg.solve(queue, Gs, Ls, *solve_args) #prg.energy(queue, Gs, Ls, *energy_args) #cl.enqueue_copy(queue, ke_group, ke_group_buf) #cl.enqueue_copy(queue, pe_group, pe_group_buf) #ke, pe = ke_group.sum(), pe_group.sum()