Exemplo n.º 1
0
def cl_reduce(function, output, input, shared, group_size, initial=0.0):

    i = c_uint(0)

    lid = clrt.get_local_id(0)

    gid = clrt.get_group_id(0)
    gsize = clrt.get_num_groups(0)

    gs2 = group_size * 2

    stride = gs2 * gsize

    i = gid * gs2 + lid

    shared[lid] = initial

    while i < input.size:
        shared[lid] = function(shared[lid], input[i])
        shared[lid] = function(shared[lid], input[i + group_size])

        i += stride

        clrt.barrier(clrt.CLK_LOCAL_MEM_FENCE)

    #The clyther compiler identifies this loop as a constant a
    # unrolls this loop
    for cgs in [512, 256, 128, 64, 32, 16, 8, 4, 2]:

        #acts as a preprocessor define #if (group_size >= 512) etc.
        if group_size >= cgs:

            if lid < cgs / 2:
                shared[lid] = function(shared[lid], shared[lid + cgs / 2])

            clrt.barrier(clrt.CLK_LOCAL_MEM_FENCE)

    if lid == 0:
        output[gid] = shared[0]
Exemplo n.º 2
0
def cl_reduce(function, output, input, shared, group_size, initial=0.0):
    
    i = c_uint(0)
    
    lid = clrt.get_local_id(0)

    gid = clrt.get_group_id(0)
    gsize = clrt.get_num_groups(0)

    gs2 = group_size * 2

    stride = gs2 * gsize

    i = gid * gs2 + lid

    shared[lid] = initial

    while i < input.size:
        shared[lid] = function(shared[lid], input[i])
        shared[lid] = function(shared[lid], input[i + group_size])
         
        i += stride
        
        clrt.barrier(clrt.CLK_LOCAL_MEM_FENCE)
        
    #The clyther compiler identifies this loop as a constant a
    # unrolls this loop 
    for cgs in [512 , 256, 128, 64, 32, 16, 8, 4, 2]:
        
        #acts as a preprocessor define #if (group_size >= 512) etc. 
        if group_size >= cgs:
            
            if lid < cgs / 2:
                shared[lid] = function(shared[lid] , shared[lid + cgs / 2])
                 
            clrt.barrier(clrt.CLK_LOCAL_MEM_FENCE)
            
    if lid == 0:
        output[gid] = shared[0]
Exemplo n.º 3
0
def reduce_kernel(function, output, array, shared, group_size):

    lid = clrt.get_local_id(0)
    gid = clrt.get_group_id(0)

    stride = group_size

    i = c_uint(gid * group_size + lid)

    igs = i + group_size

    tmp = array[i]

    if igs < array.size:
        tmp = function(tmp, array[igs])

    i += stride * 2

    while i < array.size:
        tmp = function(tmp, array[i])
        i += stride

    shared[lid] = tmp
    clrt.barrier(clrt.CLK_LOCAL_MEM_FENCE)

    #The clyther compiler identifies this loop as a constant a
    # unrolls this loop
    for cgs in [512, 256, 128, 64, 32, 16, 8, 4, 2]:

        #acts as a preprocessor define #if (group_size >= 512) etc.
        if group_size >= cgs:

            if lid < cgs / 2:
                shared[lid] = function(shared[lid], shared[lid + cgs / 2])

            clrt.barrier(clrt.CLK_LOCAL_MEM_FENCE)

    if lid == 0:
        output[gid] = shared[0]
Exemplo n.º 4
0
def reduce_kernel(function, output, array, shared, group_size):
    
    lid = clrt.get_local_id(0)
    gid = clrt.get_group_id(0)

    stride = group_size
    
    i = c_uint(gid * group_size + lid)
    
    igs = i + group_size
    
    tmp = array[i]
    
    if igs < array.size:
        tmp = function(tmp, array[igs])
        
    i += stride*2
        
    while i < array.size:
        tmp = function(tmp, array[i])
        i += stride
        
    shared[lid] = tmp
    clrt.barrier(clrt.CLK_LOCAL_MEM_FENCE)
        
    #The clyther compiler identifies this loop as a constant a
    # unrolls this loop 
    for cgs in [512 , 256, 128, 64, 32, 16, 8, 4, 2]:
        
        #acts as a preprocessor define #if (group_size >= 512) etc. 
        if group_size >= cgs:
            
            if lid < cgs / 2:
                shared[lid] = function(shared[lid] , shared[lid + cgs / 2])
                 
            clrt.barrier(clrt.CLK_LOCAL_MEM_FENCE)
            
    if lid == 0:
        output[gid] = shared[0]