示例#1
0
    def _get_remove_kernels(self):
        import compyle.parallel as parallel

        @annotate(i='int', gintp='indices, if_remove')
        def fill_if_remove(i, indices, if_remove):
            if_remove[indices[i]] = 1

        fill_if_remove_knl = parallel.Elementwise(fill_if_remove,
                                                  backend=self.backend)

        @annotate(i='int', if_remove='gintp', return_='int')
        def remove_input_expr(i, if_remove):
            return if_remove[i]

        types = {
            'i': 'int',
            'item': 'int',
            'if_remove': 'gintp',
            'new_array': self.gptr_type,
            'old_array': self.gptr_type
        }

        @annotate(**types)
        def remove_output_expr(i, item, if_remove, new_array, old_array):
            if not if_remove[i]:
                new_array[i - item] = old_array[i]

        remove_knl = parallel.Scan(remove_input_expr,
                                   remove_output_expr,
                                   'a+b',
                                   dtype=np.int32,
                                   backend=self.backend)

        return fill_if_remove_knl, remove_knl
示例#2
0
def cumsum(ary, backend=None, out=None):
    if backend is None:
        backend = ary.backend
    if backend == 'opencl' or backend == 'cuda':
        import compyle.parallel as parallel
        if out is None:
            out = empty(ary.length, ary.dtype, backend=backend)
        cumsum_scan = parallel.Scan(
            inp_cumsum, out_cumsum, 'a+b', dtype=ary.dtype, backend=backend
        )
        cumsum_scan(ary=ary, out=out)
        return out
    elif backend == 'cython':
        output = np.cumsum(ary, out=out)
        return wrap_array(output, backend)