Пример #1
0
    def _init_double_scan(self):
        """"generates a double scan on indexes and values in one operation"""
        arguments = "__global int *value", "__global int *index"
        int2 = pyopencl.tools.get_or_register_dtype("int2")
        input_expr = "index[i]>0 ? (int2)(0, 0) : (int2)(value[i], 1)"
        scan_expr = "a+b"
        neutral = "(int2)(0,0)"
        output_statement = "value[i] = item.s0; index[i+1] = item.s1;"

        if self.block_size > 256:
            knl = GenericScanKernel(self.ctx,
                                    dtype=int2,
                                    arguments=arguments,
                                    input_expr=input_expr,
                                    scan_expr=scan_expr,
                                    neutral=neutral,
                                    output_statement=output_statement)
        else:  # MacOS on CPU
            knl = GenericDebugScanKernel(self.ctx,
                                         dtype=int2,
                                         arguments=arguments,
                                         input_expr=input_expr,
                                         scan_expr=scan_expr,
                                         neutral=neutral,
                                         output_statement=output_statement)
        return knl
Пример #2
0
    def _init_compression_scan(self):
        """Initialize CBF compression scan kernels"""
        preamble = """
        int compressed_size(int diff) {
            int abs_diff = abs(diff);

            if (abs_diff < 128) {
                return 1;
            }
            else if (abs_diff < 32768) {
                return 3;
            }
            else {
                return 7;
            }
        }

        void write(const int index,
                   const int diff,
                   global char *output) {
            int abs_diff = abs(diff);

            if (abs_diff < 128) {
                output[index] = (char) diff;
            }
            else if (abs_diff < 32768) {
                output[index] = -128;
                output[index + 1] = (char) (diff >> 0);
                output[index + 2] = (char) (diff >> 8);
            }
            else {
                output[index] = -128;
                output[index + 1] = 0;
                output[index + 2] = -128;
                output[index + 3] = (char) (diff >> 0);
                output[index + 4] = (char) (diff >> 8);
                output[index + 5] = (char) (diff >> 16);
                output[index + 6] = (char) (diff >> 24);
            }
        }
        """
        arguments = "__global const int *data, __global char *compressed, __global int *size"
        input_expr = "compressed_size((i == 0) ? data[0] : (data[i] - data[i - 1]))"
        scan_expr = "a+b"
        neutral = "0"
        output_statement = """
        if (prev_item == 0) { // 1st thread store compressed data size
            size[0] = last_item;
        }
        write(prev_item, (i == 0) ? data[0] : (data[i] - data[i - 1]), compressed);
        """

        if self.block_size >= 64:
            knl = GenericScanKernel(self.ctx,
                                    dtype=numpy.int32,
                                    preamble=preamble,
                                    arguments=arguments,
                                    input_expr=input_expr,
                                    scan_expr=scan_expr,
                                    neutral=neutral,
                                    output_statement=output_statement)
        else:  # MacOS on CPU
            knl = GenericDebugScanKernel(self.ctx,
                                         dtype=numpy.int32,
                                         preamble=preamble,
                                         arguments=arguments,
                                         input_expr=input_expr,
                                         scan_expr=scan_expr,
                                         neutral=neutral,
                                         output_statement=output_statement)
        return knl