Exemplo n.º 1
0
    def _test_generateKernel(self, alpha, beta, double, reduced):
        # Translate parameters
        if double:
            dtype = np.float64
        else:
            dtype = np.float32
        # Generate three random matrices
        a_cpu = np.random.randn(self.size, self.size).astype(dtype)
        b_cpu = np.random.randn(self.size, self.size).astype(dtype)
        c_cpu = np.random.randn(self.size, self.size).astype(dtype)
        # Compute reference solution on the CPU to verify
        reference = c_cpu * beta + np.dot(a_cpu * alpha, b_cpu)
        # Get the context and queue
        ctx = cl.create_some_context()
        queue = cl.CommandQueue(ctx)
        # Shorthand for memroy flags
        mf = cl.mem_flags
        # Allocate and copy the b aray onto the Device
        b_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_cpu)
        # Allocate space for the output on the Device
        c_gpu = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=c_cpu)
        # Generate the kernel code
        kernel = gen.generateKernel(a_cpu,
                                    alpha=alpha,
                                    beta=beta,
                                    double=double,
                                    reduced=reduced,
                                    platform=self.platform)
        # Get the strides of all matrices
        typeSize = dtype().itemsize
        bstride = b_cpu.strides[0] / typeSize
        cstride = reference.strides[0] / typeSize

        # Compile and run the kernel
        program = cl.Program(ctx, kernel).build()
        gimmik_mm = program.gimmik_mm
        gimmik_mm(queue,
                  (reference.shape[0],),
                  None,
                  b_gpu,
                  c_gpu,
                  np.int32(self.size),
                  np.int32(bstride),
                  np.int32(cstride))

        # Get the product form device memory
        cl.enqueue_copy(queue, c_cpu, c_gpu)

        # Verify the result
        if double:
            rtol, atol = 1.e-4, 1.e-7
        else:
            rtol, atol = 1.e-3, 1.e-6
        self.assertTrue(np.allclose(reference, c_cpu, rtol, atol),
                "Reference solution differes from GPU's")
Exemplo n.º 2
0
    def _test_generateKernel(self, alpha, beta, double, reduced):
        # Translate parameters
        if double:
            dtype = np.float64
        else:
            dtype = np.float32
        # Generate three random matrices
        a_cpu = np.random.randn(self.size, self.size).astype(dtype)
        b_cpu = np.random.randn(self.size, self.size).astype(dtype)
        c_cpu = np.random.randn(self.size, self.size).astype(dtype)
        # Compute reference solution on the CPU to verify
        reference = c_cpu * beta + np.dot(a_cpu * alpha, b_cpu)
        # Get the context and queue
        ctx = cl.create_some_context()
        queue = cl.CommandQueue(ctx)
        # Shorthand for memroy flags
        mf = cl.mem_flags
        # Allocate and copy the b aray onto the Device
        b_gpu = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_cpu)
        # Allocate space for the output on the Device
        c_gpu = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=c_cpu)
        # Generate the kernel code
        kernel = gen.generateKernel(a_cpu,
                                    alpha=alpha,
                                    beta=beta,
                                    double=double,
                                    reduced=reduced,
                                    platform=self.platform)
        # Get the strides of all matrices
        typeSize = dtype().itemsize
        bstride = b_cpu.strides[0] / typeSize
        cstride = reference.strides[0] / typeSize

        # Compile and run the kernel
        program = cl.Program(ctx, kernel).build()
        gimmik_mm = program.gimmik_mm
        gimmik_mm(queue, (reference.shape[0], ), None, b_gpu, c_gpu,
                  np.int32(self.size), np.int32(bstride), np.int32(cstride))

        # Get the product form device memory
        cl.enqueue_copy(queue, c_cpu, c_gpu)

        # Verify the result
        if double:
            rtol, atol = 1.e-4, 1.e-7
        else:
            rtol, atol = 1.e-3, 1.e-6
        self.assertTrue(np.allclose(reference, c_cpu, rtol, atol),
                        "Reference solution differes from GPU's")
Exemplo n.º 3
0
    def _test_generateKernel(self, alpha, beta, double, reduced):
        # Translate parameters
        if double:
            dtype = np.float64
        else:
            dtype = np.float32
        # Generate three random matrices
        a_cpu = np.random.randn(self.size, self.size).astype(dtype)
        b_cpu = np.random.randn(self.size, self.size).astype(dtype)
        c_cpu = np.random.randn(self.size, self.size).astype(dtype)
        # Compute reference solution on the CPU to verify
        reference = c_cpu * beta + np.dot(a_cpu * alpha, b_cpu)
        # Allocate and copy the b and c array onto the Device
        b_gpu = gpuarray.to_gpu(b_cpu)
        c_gpu = gpuarray.to_gpu(c_cpu)
        # Re-assign reference to c_cpu
        c_cpu = reference
        # Generate the kernel code
        kernel = gen.generateKernel(a_cpu,
                                    alpha=alpha,
                                    beta=beta,
                                    double=double,
                                    reduced=reduced,
                                    platform=self.platform)
        # Get the strides of all matrices
        typeSize = dtype().itemsize
        bstride = b_gpu.strides[0] / typeSize
        cstride = c_gpu.strides[0] / typeSize

        # Compile and run the kernel
        module = compiler.SourceModule(kernel)
        function = module.get_function('gimmik_mm')
        block, grid = (self.size, 1, 1), (1, 1)
        function(b_gpu,
                 c_gpu,
                 np.int32(self.size),
                 np.int32(bstride),
                 np.int32(cstride),
                 block=block,
                 grid=grid)

        # Verify the result
        if double:
            rtol, atol = 1.e-4, 1.e-7
        else:
            rtol, atol = 1.e-3, 1.e-6
        self.assertTrue(np.allclose(c_cpu, c_gpu.get(), rtol, atol),
                        "Reference solution differes from GPU's")
Exemplo n.º 4
0
    def _test_generateKernel(self, alpha, beta, double, reduced):
        # Translate parameters
        if double:
            dtype = np.float64
        else:
            dtype = np.float32
        # Generate three random matrices
        a_cpu = np.random.randn(self.size, self.size).astype(dtype)
        b_cpu = np.random.randn(self.size, self.size).astype(dtype)
        c_cpu = np.random.randn(self.size, self.size).astype(dtype)
        # Compute reference solution on the CPU to verify
        reference = c_cpu * beta + np.dot(a_cpu * alpha, b_cpu)
        # Allocate and copy the b and c array onto the Device
        b_gpu = gpuarray.to_gpu(b_cpu)
        c_gpu = gpuarray.to_gpu(c_cpu)
        # Re-assign reference to c_cpu
        c_cpu = reference
        # Generate the kernel code
        kernel = gen.generateKernel(a_cpu,
                                    alpha=alpha,
                                    beta=beta,
                                    double=double,
                                    reduced=reduced,
                                    platform=self.platform)
        # Get the strides of all matrices
        typeSize = dtype().itemsize
        bstride = b_gpu.strides[0] / typeSize
        cstride = c_gpu.strides[0] / typeSize

        # Compile and run the kernel
        module = compiler.SourceModule(kernel)
        function = module.get_function('gimmik_mm')
        block, grid = (self.size, 1, 1), (1, 1)
        function(b_gpu, 
                 c_gpu, 
                 np.int32(self.size), 
                 np.int32(bstride), 
                 np.int32(cstride), 
                 block=block,
                 grid=grid)

        # Verify the result
        if double:
            rtol, atol = 1.e-4, 1.e-7
        else:
            rtol, atol = 1.e-3, 1.e-6
        self.assertTrue(np.allclose(c_cpu, c_gpu.get(), rtol, atol),
                "Reference solution differes from GPU's")