Пример #1
0
def test_array_multiplication():

    # 1. Create Kompute Manager (selects device 0 by default)
    mgr = kp.Manager()

    # 2. Create Kompute Tensors to hold data
    tensor_in_a = kp.Tensor([2, 2, 2])
    tensor_in_b = kp.Tensor([1, 2, 3])
    tensor_out = kp.Tensor([0, 0, 0])

    # 3. Initialise the Kompute Tensors in the GPU
    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])

    # 4. Define the multiplication shader code to run on the GPU
    @ps.python2shader
    def compute_shader_multiply(index=("input", "GlobalInvocationId",
                                       ps.ivec3),
                                data1=("buffer", 0, ps.Array(ps.f32)),
                                data2=("buffer", 1, ps.Array(ps.f32)),
                                data3=("buffer", 2, ps.Array(ps.f32))):
        i = index.x
        data3[i] = data1[i] * data2[i]

    # 5. Run shader code against our previously defined tensors
    mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out],
                           compute_shader_multiply.to_spirv())

    # 6. Sync tensor data from GPU back to local
    mgr.eval_tensor_sync_local_def([tensor_out])

    assert tensor_out.data() == [2.0, 4.0, 6.0]
    assert np.all(tensor_out.numpy() == [2.0, 4.0, 6.0])
Пример #2
0
def test_shader_str():
    """
    Test basic OpAlgoBase operation
    """

    shader = """
#version 450
layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];};
layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];};
layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];};
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

void main()
{
    uint index = gl_GlobalInvocationID.x;
    valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
}
    """

    tensor_in_a = kp.Tensor([2, 2, 2])
    tensor_in_b = kp.Tensor([1, 2, 3])
    tensor_out = kp.Tensor([0, 0, 0])

    mgr = kp.Manager()
    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])

    spirv = kp.Shader.compile_source(shader)

    mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], spirv)

    mgr.eval_tensor_sync_local_def([tensor_out])

    assert tensor_out.data() == [2.0, 4.0, 6.0]
Пример #3
0
def test_sequence():
    """
    Test basic OpAlgoBase operation
    """
    mgr = kp.Manager(0, [2])

    tensor_in_a = kp.Tensor([2, 2, 2])
    tensor_in_b = kp.Tensor([1, 2, 3])
    tensor_out = kp.Tensor([0, 0, 0])

    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])

    shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")
    mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out],
                                 shaderFilePath)

    mgr.eval_await_def()

    seq = mgr.create_sequence("op")
    seq.begin()
    seq.record_tensor_sync_local([tensor_in_a])
    seq.record_tensor_sync_local([tensor_in_b])
    seq.record_tensor_sync_local([tensor_out])
    seq.end()
    seq.eval()

    assert tensor_out.data() == [2.0, 4.0, 6.0]
Пример #4
0
def test_opalgobase_data():
    """
    Test basic OpAlgoBase operation
    """

    tensor_in_a = kp.Tensor([2, 2, 2])
    tensor_in_b = kp.Tensor([1, 2, 3])
    tensor_out = kp.Tensor([0, 0, 0])

    mgr = kp.Manager()

    shaderData = """
        #version 450

        layout (local_size_x = 1) in;

        // The input tensors bind index is relative to index in parameter passed
        layout(set = 0, binding = 0) buffer bina { float tina[]; };
        layout(set = 0, binding = 1) buffer binb { float tinb[]; };
        layout(set = 0, binding = 2) buffer bout { float tout[]; };

        void main() {
            uint index = gl_GlobalInvocationID.x;
            tout[index] = tina[index] * tinb[index];
        }
    """

    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])

    mgr.eval_algo_str_def([tensor_in_a, tensor_in_b, tensor_out],
                          list(shaderData))

    mgr.eval_tensor_sync_local_def([tensor_out])

    assert tensor_out.data() == [2.0, 4.0, 6.0]
Пример #5
0
def test_opmult():
    """
    Test basic OpMult operation
    """

    tensor_in_a = kp.Tensor([2, 2, 2])
    tensor_in_b = kp.Tensor([1, 2, 3])
    tensor_out = kp.Tensor([0, 0, 0])

    mgr = kp.Manager()

    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])

    mgr.eval_algo_mult_def([tensor_in_a, tensor_in_b, tensor_out])

    mgr.eval_tensor_sync_local_def([tensor_out])

    assert tensor_out.data() == [2.0, 4.0, 6.0]
Пример #6
0
def test_workgroup():
    mgr = kp.Manager(0)

    tensor_a = kp.Tensor(np.zeros([16, 8]))
    tensor_b = kp.Tensor(np.zeros([16, 8]))

    mgr.rebuild([tensor_a, tensor_b])

    @ps.python2shader
    def compute_shader_wg(gl_idx=("input", "GlobalInvocationId", ps.ivec3),
                          gl_wg_id=("input", "WorkgroupId", ps.ivec3),
                          gl_wg_num=("input", "NumWorkgroups", ps.ivec3),
                          data1=("buffer", 0, ps.Array(ps.f32)),
                          data2=("buffer", 1, ps.Array(ps.f32))):
        i = gl_wg_id.x * gl_wg_num.y + gl_wg_id.y
        data1[i] = f32(gl_idx.x)
        data2[i] = f32(gl_idx.y)

    seq = mgr.sequence("new")
    seq.begin()
    seq.record_algo_data([tensor_a, tensor_b],
                         compute_shader_wg.to_spirv(),
                         workgroup=(16, 8, 1))
    seq.end()
    seq.eval()

    mgr.destroy(seq)

    assert seq.is_init() == False

    mgr.eval_tensor_sync_local_def([tensor_a, tensor_b])

    print(tensor_a.numpy())
    print(tensor_b.numpy())

    assert np.all(tensor_a.numpy() == np.stack([np.arange(16)] *
                                               8, axis=1).ravel())
    assert np.all(tensor_b.numpy() == np.stack([np.arange(8)] *
                                               16, axis=0).ravel())

    mgr.destroy([tensor_a, tensor_b])

    assert tensor_a.is_init() == False
    assert tensor_b.is_init() == False
Пример #7
0
def test_opalgobase_file():
    """
    Test basic OpAlgoBase operation
    """

    tensor_in_a = kp.Tensor([2, 2, 2])
    tensor_in_b = kp.Tensor([1, 2, 3])
    tensor_out = kp.Tensor([0, 0, 0])

    mgr = kp.Manager()
    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])

    shader_path = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp.spv")

    mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shader_path)

    mgr.eval_tensor_sync_local_def([tensor_out])

    assert tensor_out.data() == [2.0, 4.0, 6.0]
Пример #8
0
def test_tensor_rebuild_backwards_compat():
    """
    Test basic OpMult operation
    """

    tensor_in_a = kp.Tensor([2, 2, 2])
    tensor_in_b = kp.Tensor([1, 2, 3])
    tensor_out = kp.Tensor([0, 0, 0])

    mgr = kp.Manager()

    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])

    shader_path = os.path.abspath(os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp.spv"))
    mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shader_path)
    mgr.eval_await_def()

    mgr.eval_tensor_sync_local_def([tensor_out])

    assert tensor_out.data() == [2.0, 4.0, 6.0]
    assert np.all(tensor_out.numpy() == [2.0, 4.0, 6.0])
Пример #9
0
def test_sequence():
    """
    Test basic OpAlgoBase operation
    """
    mgr = kp.Manager(0, [2])

    tensor_in_a = kp.Tensor([2, 2, 2])
    tensor_in_b = kp.Tensor([1, 2, 3])
    tensor_out = kp.Tensor([0, 0, 0])

    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])

    shader_path = os.path.abspath(
        os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp.spv"))
    mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out],
                                 shader_path)

    mgr.eval_await_def()

    seq = mgr.sequence("op")
    seq.begin()
    seq.record_tensor_sync_local([tensor_in_a])
    seq.record_tensor_sync_local([tensor_in_b])
    seq.record_tensor_sync_local([tensor_out])
    seq.end()
    seq.eval()

    mgr.destroy("op")

    assert seq.is_init() == False

    assert tensor_out.data() == [2.0, 4.0, 6.0]
    assert np.all(tensor_out.numpy() == [2.0, 4.0, 6.0])

    mgr.destroy(tensor_in_a)
    mgr.destroy([tensor_in_b, tensor_out])

    assert tensor_in_a.is_init() == False
    assert tensor_in_b.is_init() == False
    assert tensor_out.is_init() == False
Пример #10
0
def test_workgroup():
    mgr = kp.Manager(0)

    tensor_a = kp.Tensor(np.zeros([16, 8]))
    tensor_b = kp.Tensor(np.zeros([16, 8]))
    mgr.eval_tensor_create_def([tensor_a, tensor_b])

    shader_src = """
        #version 450

        layout (local_size_x = 1) in;

        // The input tensors bind index is relative to index in parameter passed
        layout(set = 0, binding = 0) writeonly buffer bout  { float toutx[]; };
        layout(set = 0, binding = 1) writeonly buffer bout2 { float touty[]; };

        void main() {
            uint index   = gl_WorkGroupID.x*gl_NumWorkGroups.y + gl_WorkGroupID.y;
            
            toutx[index] = gl_GlobalInvocationID.x;
            touty[index] = gl_GlobalInvocationID.y;
        }
    """
    shader_src = bytes(shader_src, encoding='utf8')

    seq = mgr.create_sequence()
    seq.begin()
    seq.record_algo_data([tensor_a, tensor_b], shader_src, (16, 8, 1))
    seq.end()
    seq.eval()

    mgr.eval_tensor_sync_local_def([tensor_a, tensor_b])
    assert np.all(tensor_a.numpy() == np.stack([np.arange(16)] *
                                               8, axis=1).ravel())
    assert np.all(tensor_b.numpy() == np.stack([np.arange(8)] *
                                               16, axis=0).ravel())
Пример #11
0
def render_base(args, folder):
    SIZE = (args.width, args.height)

    # pygame setup if visual enabled
    surf = None
    if (args.vis):
        pygame.init()
        surf = pygame.display.set_mode(SIZE)

    # change verbosity level
    kp_logger = logging.getLogger("kp")
    kp_logger.setLevel(50 - (max(min(args.verbose, 4), 0) * 10))

    # init manager
    mgr = kp.Manager(args.device)

    # shader inputs
    tensor_size = kp.Tensor(SIZE)
    tensor_frame = kp.Tensor([0])
    tensor_offset = kp.Tensor([0])
    tensor_out = kp.Tensor(np.zeros((SIZE[0] * SIZE[1] * 3)))

    # allocate memory on gpu
    mgr.eval_tensor_create_def([tensor_out, tensor_size, tensor_frame, tensor_offset])

    # read shader
    f = open(folder + args.scene + ".spv", "rb")

    # create sequences
    sq_sdf = mgr.create_sequence()
    sq_sdf.begin()
    sq_sdf.record_tensor_sync_device([tensor_frame])
    sq_sdf.end()

    sq_sdo = mgr.create_sequence()
    sq_sdo.begin()
    sq_sdo.record_tensor_sync_device([tensor_offset])
    sq_sdo.end()

    sq_r = mgr.create_sequence()
    sq_r.begin()
    sq_r.record_algo_data([tensor_out, tensor_size, tensor_frame, tensor_offset], f.read())
    sq_r.end()

    sq_sl = mgr.create_sequence()
    sq_sl.begin()
    sq_sl.record_tensor_sync_local([tensor_out])
    sq_sl.end()

    # close shader file
    f.close()

    # render frames
    for i in range(args.start, args.end + 1):
        if (args.verbose > 0):
            print("rendering frame {}".format(i))

        # run program
        tensor_frame[0] = i
        # copy frame to shader
        sq_sdf.eval()
        # split into smaller chunks
        for j in range(16):
            if (args.verbose > 1):
                print("- rendering chunk {}".format(j))

            tensor_offset[0] = j
            # copy offset to shader
            sq_sdo.eval()
            # run shader
            sq_r.eval()
        # copy frame from shader
        sq_sl.eval()

        # save frame to output
        frame = np.flip(np.array(tensor_out.data()).reshape((SIZE[1], SIZE[0], 3)), axis=0)
        plt.imsave("output/image{}.png".format(i), frame)

        # visualize
        if (args.vis):
            # create surface from array
            surf2 = pygame.surfarray.make_surface(np.swapaxes(frame, 0, 1) * 255)

            # weird pygame bug
            surf.blit(surf2, (0, 0))
            pygame.display.update()
            surf.blit(surf2, (0, 0))
            pygame.display.update()

            # stop on last frame
            if (i == args.end):
                while True:
                    for event in pygame.event.get():
                        if event.type == pygame.QUIT:
                            quit()
def test_logistic_regression():
    @ps.python2shader
    def compute_shader(index=("input", "GlobalInvocationId", ps.ivec3),
                       x_i=("buffer", 0, ps.Array(ps.f32)),
                       x_j=("buffer", 1, ps.Array(ps.f32)),
                       y=("buffer", 2, ps.Array(ps.f32)),
                       w_in=("buffer", 3, ps.Array(ps.f32)),
                       w_out_i=("buffer", 4, ps.Array(ps.f32)),
                       w_out_j=("buffer", 5, ps.Array(ps.f32)),
                       b_in=("buffer", 6, ps.Array(ps.f32)),
                       b_out=("buffer", 7, ps.Array(ps.f32)),
                       l_out=("buffer", 8, ps.Array(ps.f32)),
                       M=("buffer", 9, ps.Array(ps.f32))):

        i = index.x

        m = M[0]

        w_curr = vec2(w_in[0], w_in[1])
        b_curr = b_in[0]

        x_curr = vec2(x_i[i], x_j[i])
        y_curr = y[i]

        z_dot = w_curr @ x_curr
        z = z_dot + b_curr
        y_hat = 1.0 / (1.0 + exp(-z))

        d_z = y_hat - y_curr
        d_w = (1.0 / m) * x_curr * d_z
        d_b = (1.0 / m) * d_z

        loss = -((y_curr * log(y_hat)) + ((1.0 + y_curr) * log(1.0 - y_hat)))

        w_out_i[i] = d_w.x
        w_out_j[i] = d_w.y
        b_out[i] = d_b
        l_out[i] = loss

    mgr = kp.Manager(0)

    # First we create input and ouput tensors for shader
    tensor_x_i = kp.Tensor([0.0, 1.0, 1.0, 1.0, 1.0])
    tensor_x_j = kp.Tensor([0.0, 0.0, 0.0, 1.0, 1.0])

    tensor_y = kp.Tensor([0.0, 0.0, 0.0, 1.0, 1.0])

    tensor_w_in = kp.Tensor([0.001, 0.001])
    tensor_w_out_i = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
    tensor_w_out_j = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])

    tensor_b_in = kp.Tensor([0.0])
    tensor_b_out = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])

    tensor_l_out = kp.Tensor([0.0, 0.0, 0.0, 0.0, 0.0])

    tensor_m = kp.Tensor([tensor_y.size()])

    # We store them in an array for easier interaction
    params = [
        tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
        tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m
    ]

    mgr.eval_tensor_create_def(params)

    # Create a managed sequence
    sq = mgr.create_sequence()

    # Clear previous operations and begin recording for new operations
    sq.begin()

    # Record operation to sync memory from local to GPU memory
    sq.record_tensor_sync_device([tensor_w_in, tensor_b_in])

    # Record operation to execute GPU shader against all our parameters
    sq.record_algo_data(params, compute_shader.to_spirv())

    # Record operation to sync memory from GPU to local memory
    sq.record_tensor_sync_local(
        [tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out])

    # Stop recording operations
    sq.end()

    ITERATIONS = 100
    learning_rate = 0.1

    # Perform machine learning training and inference across all input X and Y
    for i_iter in range(ITERATIONS):

        # Execute an iteration of the algorithm
        sq.eval()

        # Calculate the parameters based on the respective derivatives calculated
        for j_iter in range(tensor_b_out.size()):
            tensor_w_in[0] -= learning_rate * tensor_w_out_i.data()[j_iter]
            tensor_w_in[1] -= learning_rate * tensor_w_out_j.data()[j_iter]
            tensor_b_in[0] -= learning_rate * tensor_b_out.data()[j_iter]

    assert tensor_w_in.data()[0] < 0.01
    assert tensor_w_in.data()[0] > 0.0
    assert tensor_w_in.data()[1] > 1.5
    assert tensor_b_in.data()[0] < 0.7