Exemplo n.º 1
0
 def backward(self, out_grad, in_data, out_data, in_grad):
     l=in_data[1]
     y=out_data[0]
     dx=in_grad[0]
     if self.bwd_kernel==None:
         self.bwd_kernel=mx.rtc('softmax_grad', [('y',y),('l',l)], [('dx',dx)])
     self.bwd_kernel.push([y,l],[dx], (y.shape[0],1,1), (y.shape[1],1,1))
Exemplo n.º 2
0
    def backward(self, out_grad, in_data, out_data, in_grad):
        l = in_data[1]
        y = out_data[0]
        dx = in_grad[0]
        if self.bwd_kernel is None:
            self.bwd_kernel = mx.rtc('softmax_grad', [('y', y), ('l', l)], [('dx', dx)], """
int i = blockIdx.x;
int j = threadIdx.x;
int k = static_cast<int>(l[i]);
if (j == k) {
    dx[i*dx_dims[1]+j] = y[i*dx_dims[1]+j] - 1.0f;
} else {
    dx[i*dx_dims[1]+j] = y[i*dx_dims[1]+j];
}
""")
        self.bwd_kernel.push([y,l], [dx], (y.shape[0],1,1), (y.shape[1], 1, 1))
Exemplo n.º 3
0
    def backward(self, out_grad, in_data, out_data, in_grad):
        l = in_data[1]
        y = out_data[0]
        dx = in_grad[0]
        if self.bwd_kernel is None:
            self.bwd_kernel = mx.rtc('softmax_grad', [('y', y), ('l', l)], [('dx', dx)], """
int i = blockIdx.x;
int j = threadIdx.x;
int k = static_cast<int>(l[i]);
if (j == k) {
    dx[i*dx_dims[1]+j] = y[i*dx_dims[1]+j] - 1.0f;
} else {
    dx[i*dx_dims[1]+j] = y[i*dx_dims[1]+j];
}
""")
        self.bwd_kernel.push([y,l], [dx], (y.shape[0],1,1), (y.shape[1], 1, 1))
Exemplo n.º 4
0
 def forward(self, in_data, out_data):
     nn, source = in_data
     target = out_data[0]
     if self.fwd_kernel is None:
         self.fwd_kernel = mx.rtc('assignpatch', [('nn', nn), ('source', source)], [('target', target)], """
             int target_idx = threadIdx.x*target_dims[3]*target_dims[2]+blockIdx.x*target_dims[3]+blockIdx.y;
             int source_idx = nn[blockIdx.x*nn_dims[1]+blockIdx.y]*source_dims[1]*source_dims[2]*source_dims[3]
                                 + threadIdx.x*source_dims[2]*source_dims[3];
             for (int i = 0; i < source_dims[2]; i++){
                 for (int j = 0; j < source_dims[3]; j++){
                     atomicAdd(target+target_idx, source[source_idx]);
                     target_idx++;
                     source_idx++;
                 }
                 target_idx += target_dims[3]-source_dims[3];
             }
         """)
     self.fwd_kernel.push([nn, source], [target], (target.shape[2]-source.shape[2]+1, target.shape[3]-source.shape[3]+1, 1), (source.shape[1],1,1))
Exemplo n.º 5
0
 def forward(self, in_data, out_data):
     nn, source = in_data
     target = out_data[0]
     if self.fwd_kernel is None:
         self.fwd_kernel = mx.rtc('assignpatch', [('nn', nn), ('source', source)], [('target', target)], """
             int target_idx = threadIdx.x*target_dims[3]*target_dims[2]+blockIdx.x*target_dims[3]+blockIdx.y;
             int source_idx = nn[blockIdx.x*nn_dims[1]+blockIdx.y]*source_dims[1]*source_dims[2]*source_dims[3]
                                 + threadIdx.x*source_dims[2]*source_dims[3];
             for (int i = 0; i < source_dims[2]; i++){
                 for (int j = 0; j < source_dims[3]; j++){
                     atomicAdd(target+target_idx, source[source_idx]);
                     target_idx++;
                     source_idx++;
                 }
                 target_idx += target_dims[3]-source_dims[3];
             }
         """)
     self.fwd_kernel.push([nn, source], [target],
                          (target.shape[2] - source.shape[2] + 1, target.shape[3] - source.shape[3] + 1, 1),
                          (source.shape[1], 1, 1))
Exemplo n.º 6
0
    def forward(self, in_data, out_data):
        x = in_data[0]
        y = out_data[0]
        if self.fwd_kernel is None:
            self.fwd_kernel = mx.rtc('softmax', [('x', x)], [('y', y)], """
int i = threadIdx.x + blockIdx.x*blockDim.x;
float max_x = x[i*x_dims[1]];
for (int j = 1; j < x_dims[1]; ++j) {
    if (max_x < x[i*x_dims[1]+j]) {
        max_x = x[i*x_dims[1]+j];
    }
}
float sum = 0.0f;
for (int j = 0; j < x_dims[1]; ++j) {
    sum += expf(x[i*x_dims[1]+j]-max_x);
}
for (int j = 0; j < x_dims[1]; ++j) {
    y[i*x_dims[1]+j] = expf(x[i*x_dims[1]+j]-max_x)/sum;
}
""")
        self.fwd_kernel.push([x], [y], (1, 1, 1), (x.shape[0], 1, 1))
Exemplo n.º 7
0
    def forward(self, in_data, out_data):
        x = in_data[0]
        y = out_data[0]
        if self.fwd_kernel is None:
            self.fwd_kernel = mx.rtc('softmax', [('x', x)], [('y', y)], """
int i = threadIdx.x + blockIdx.x*blockDim.x;
float max_x = x[i*x_dims[1]];
for (int j = 1; j < x_dims[1]; ++j) {
    if (max_x < x[i*x_dims[1]+j]) {
        max_x = x[i*x_dims[1]+j];
    }
}
float sum = 0.0f;
for (int j = 0; j < x_dims[1]; ++j) {
    sum += expf(x[i*x_dims[1]+j]-max_x);
}
for (int j = 0; j < x_dims[1]; ++j) {
    y[i*x_dims[1]+j] = expf(x[i*x_dims[1]+j]-max_x)/sum;
}
""")
        self.fwd_kernel.push([x], [y], (1, 1, 1), (x.shape[0], 1, 1))
Exemplo n.º 8
0
# pylint: skip-file
import mxnet as mx
import numpy as np
from numpy.testing import assert_allclose

if __name__ == '__main__':
    x = mx.nd.zeros((10,), ctx=mx.gpu(0))
    x[:] = 1
    y = mx.nd.zeros((10,), ctx=mx.gpu(0))
    y[:] = 2
    rtc = mx.rtc('abc', [('x', x)], [('y', y)], """
        __shared__ float s_rec[10];
        s_rec[threadIdx.x] = x[threadIdx.x];
        y[threadIdx.x] = expf(s_rec[threadIdx.x]*5.0);""")
    rtc.push([x], [y], (1, 1, 1), (10,1,1))
    assert_allclose(y.asnumpy(), np.exp(x.asnumpy()*5.0))
Exemplo n.º 9
0
# pylint: skip-file
import mxnet as mx
import numpy as np
from numpy.testing import assert_allclose

if __name__ == '__main__':
    x = mx.nd.zeros((10, ), ctx=mx.gpu(0))
    x[:] = 1
    y = mx.nd.zeros((10, ), ctx=mx.gpu(0))
    y[:] = 2
    rtc = mx.rtc(
        'abc', [('x', x)], [('y', y)], """
        __shared__ float s_rec[10];
        s_rec[hipThreadIdx_x] = x[hipThreadIdx_x];
        y[hipThreadIdx_x] = expf(s_rec[hipThreadIdx_x]*5.0);""")
    rtc.push([x], [y], (1, 1, 1), (10, 1, 1))
    assert_allclose(y.asnumpy(), np.exp(x.asnumpy() * 5.0))