示例#1
0
def list_from_cuda_array(a, n, release_memory=True):
    c_int_p = ct.POINTER(ct.c_int)
    host_array = (c_int_p * n)()
    host_ptr = ct.cast(host_array, c_int_p)
    elem_size = ct.sizeof(ct.c_int)
    cudart.cuda_memcpy(host_ptr, a, n * elem_size, 'default')
    if release_memory:
        cudart.cuda_free(a)
    a = np.ndarray(shape=(n, ), dtype=np.int32, buffer=host_array, order='F')
    return a.tolist()
示例#2
0
def list_from_cuda_array(a, n, release_memory=True):
    c_int_p = ct.POINTER(ct.c_int)
    host_array = (c_int_p * n)()
    host_ptr = ct.cast(host_array, c_int_p)
    elem_size = ct.sizeof(ct.c_int)
    cudart.cuda_memcpy(host_ptr, a, n * elem_size, 'default')
    if release_memory:
        cudart.cuda_free(a)
    a = np.ndarray(shape=(n, ), dtype=np.int32, buffer=host_array, order='F')
    return a.tolist()
示例#3
0
    def test_dependencies(self):
        N = 10
        k = 6
        execution_checklist = cuda_array_from_list([0] * (k * N + 1))
        test_results = cuda_array_from_list([0] * (k * N + 1))
        contexts = [GpuContext() for _ in xrange(k)]

        blocking_nodes = list()
        blocking_nodes.append(cuda_array_from_list([]))
        for i in xrange(N):
            blocking_nodes.append(cuda_array_from_list([i * k]))
            blocking_nodes.append(
                cuda_array_from_list(range(i * k + 1, i * k + 4)))
            blocking_nodes.append(
                cuda_array_from_list(range(i * k + 4, i * k + 6)))

        for context_id in xrange(5, 6):
            test_dependencies(contexts[context_id].cuda_stream, 0,
                              blocking_nodes[0], 0, execution_checklist,
                              test_results)
            contexts[context_id].block(*contexts[:3])

        for i in xrange(N):
            for context_id in xrange(3):
                test_dependencies(contexts[context_id].cuda_stream,
                                  i * k + context_id + 1,
                                  blocking_nodes[i * 3 + 1], 1,
                                  execution_checklist, test_results)

            for context_id in xrange(3, 5):
                contexts[context_id].wait(*contexts[:3])
                test_dependencies(contexts[context_id].cuda_stream,
                                  i * k + context_id + 1,
                                  blocking_nodes[i * 3 + 2], 3,
                                  execution_checklist, test_results)

            for context_id in xrange(5, 6):
                contexts[context_id].wait(*contexts[3:5])
                test_dependencies(contexts[context_id].cuda_stream,
                                  i * k + context_id + 1,
                                  blocking_nodes[i * 3 + 3], 2,
                                  execution_checklist, test_results)
                contexts[context_id].block(*contexts[:3])

        for nodes in blocking_nodes:
            cudart.cuda_free(nodes)

        test_results = list_from_cuda_array(test_results, k * N + 1)
        execution_checklist = list_from_cuda_array(execution_checklist,
                                                   k * N + 1)
        self.assertEqual(
            sum(test_results) + sum(execution_checklist), 2 * (k * N + 1))
示例#4
0
    def test_dependencies(self):
        N = 10
        k = 6
        execution_checklist = cuda_array_from_list([0] * (k * N + 1))
        test_results = cuda_array_from_list([0] * (k * N + 1))
        contexts = [GpuContext() for _ in xrange(k)]

        blocking_nodes = list()
        blocking_nodes.append(cuda_array_from_list([]))
        for i in xrange(N):
            blocking_nodes.append(cuda_array_from_list([i*k]))
            blocking_nodes.append(cuda_array_from_list(range(i*k + 1, i*k + 4)))
            blocking_nodes.append(cuda_array_from_list(range(i*k + 4, i*k + 6)))

        for context_id in xrange(5, 6):
            test_dependencies(contexts[context_id].cuda_stream, 0, blocking_nodes[0], 0, execution_checklist, test_results)
            contexts[context_id].block(*contexts[:3])

        for i in xrange(N):
            for context_id in xrange(3):
                test_dependencies(contexts[context_id].cuda_stream, i * k + context_id + 1, blocking_nodes[i*3+1], 1, execution_checklist, test_results)

            for context_id in xrange(3, 5):
                contexts[context_id].wait(*contexts[:3])
                test_dependencies(contexts[context_id].cuda_stream, i * k + context_id + 1, blocking_nodes[i*3+2], 3, execution_checklist, test_results)

            for context_id in xrange(5, 6):
                contexts[context_id].wait(*contexts[3:5])
                test_dependencies(contexts[context_id].cuda_stream, i * k + context_id + 1, blocking_nodes[i*3+3], 2, execution_checklist, test_results)
                contexts[context_id].block(*contexts[:3])

        for nodes in blocking_nodes:
            cudart.cuda_free(nodes)

        test_results = list_from_cuda_array(test_results, k * N + 1)
        execution_checklist = list_from_cuda_array(execution_checklist, k * N + 1)
        self.assertEqual(sum(test_results) + sum(execution_checklist), 2 * (k * N + 1))