Exemplo n.º 1
0
def count_features_dense_kernel(float_dtype, int_dtype):

    kernel_str = r'''
    ({0} *out,
     {0} *in,
     int n_rows,
     int n_cols,
     {1} *labels,
     int n_classes,
     bool square,
     bool rowMajor) {

      int row = blockIdx.x * blockDim.x + threadIdx.x;
      int col = blockIdx.y * blockDim.y + threadIdx.y;

      if(row >= n_rows || col >= n_cols) return;

      {0} val = !rowMajor ?
            in[col * n_rows + row] : in[row * n_cols + col];

      if(val == 0.0) return;

      if(square) val *= val;
      {1} label = labels[row];

      atomicAdd(out + ((col * n_classes) + label), val);
    }'''

    return cuda_kernel_factory(kernel_str, (float_dtype, int_dtype),
                               "count_features_dense")
Exemplo n.º 2
0
def count_features_coo_kernel(float_dtype, int_dtype):
    """
    A simple reduction kernel that takes in a sparse (COO) array
    of features and computes the sum (or sum squared) for each class
    label
    """

    kernel_str = r'''({0} *out,
                    int *rows, int *cols,
                    {0} *vals, int nnz,
                    int n_rows, int n_cols,
                    {1} *labels,
                    int n_classes,
                    bool square) {

      int i = blockIdx.x * blockDim.x + threadIdx.x;

      if(i >= nnz) return;

      int row = rows[i];
      int col = cols[i];
      {0} val = vals[i];
      if(square) val *= val;
      {1} label = labels[row];
      atomicAdd(out + ((col * n_classes) + label), val);
    }'''

    return cuda_kernel_factory(kernel_str, (float_dtype, int_dtype),
                               "count_features_coo")
Exemplo n.º 3
0
def _map_l2_norm_kernel(dtype):
    """Creates cupy RawKernel for csr_raw_normalize_l2 function."""

    map_kernel_str = r'''
    ({0} *data, {1} *indices, {2} *indptr, int n_samples) {

      int tid = blockDim.x * blockIdx.x + threadIdx.x;

      if(tid >= n_samples) return;
      {0} sum = 0.0;

      for(int i = indptr[tid]; i < indptr[tid+1]; i++) {
        sum += (data[i] * data[i]);
      }

      if(sum == 0) return;

      sum = sqrt(sum);

      for(int i = indptr[tid]; i < indptr[tid+1]; i++) {
        data[i] /= sum;
      }
    }
    '''
    return cuda_kernel_factory(map_kernel_str, dtype, "map_l2_norm_kernel")
Exemplo n.º 4
0
def count_classes_kernel(float_dtype, int_dtype):
    kernel_str = r'''
    ({0} *out, int n_rows, {1} *labels) {

      int row = blockIdx.x * blockDim.x + threadIdx.x;
      if(row >= n_rows) return;
      {1} label = labels[row];
      atomicAdd(out + label, 1);
    }'''

    return cuda_kernel_factory(kernel_str, (float_dtype, int_dtype),
                               "count_classes")
Exemplo n.º 5
0
def _binarize_kernel(x_dtype):
    binarize_kernel_str = r'''({0} *x, float threshold, int x_n) {

    int tid = blockDim.x * blockIdx.x + threadIdx.x;

    if(tid >= x_n) return;

    {0} val = x[tid];
    if(val > threshold)
        val = 1;
    else
        val = 0;

    x[tid] = val;
    }'''
    return cuda_kernel_factory(binarize_kernel_str,
                               (x_dtype,),
                               "binarize_kernel")
Exemplo n.º 6
0
def _validate_kernel(dtype):
    return cuda_kernel_factory(validate_kernel_str, (dtype, ),
                               "validate_labels_kernel")
Exemplo n.º 7
0
def _inverse_map_kernel(dtype):
    return cuda_kernel_factory(inverse_map_kernel_str, (dtype, ),
                               "inv_map_labels_kernel")
Exemplo n.º 8
0
def _map_kernel(dtype):
    return cuda_kernel_factory(map_kernel_str, (dtype, ), "map_labels_kernel")
Exemplo n.º 9
0
def _cov_kernel(dtype):
    return cuda_kernel_factory(cov_kernel_str, (dtype, ), "cov_kernel")