Exemplo n.º 1
0
    def forward_gpu(self, x):
        i_len, j_len = array.as_mat(x[0]).shape
        k_len = array.as_mat(x[1]).shape[1]
        l_len = self.W.shape[2]

        # When indices are enclosed with [], they are 'flatten'
        # (i.e. linealized as 1-D array)
        # ij->[ij]
        e1 = array.as_vec(x[0])
        # ik->[ik]
        e2 = array.as_vec(x[1])
        e1e2 = cuda.empty(i_len * j_len * k_len, dtype=numpy.float32)
        # '[ij],[ik]->[ijk]'
        cuda.elementwise(
            'float* y, float* e1, float* e2, int e1c, int e2c',
            '''
            int I = i / e1c / e2c;
            int J = (i - I * e1c * e2c) / e2c;
            int K = i % e2c;
            y[i] = e1[I * e1c + J] * e2[I * e2c + K];
            ''',
            'row_wise_outer_product')(
                e1e2, e1, e2, j_len, k_len)

        # [ijk]->i[jk]
        e1e2 = e1e2.reshape(i_len, j_len * k_len)

        # jkl->[jk]l
        W_mat = self.W.reshape(
            self.W.shape[0] * self.W.shape[1], self.W.shape[2])

        y = cuda.empty((i_len, l_len), dtype=numpy.float32)
        with cuda.using_cumisc():
            # 'i[jk],[jk]l->il'
            cuda.culinalg.dot(e1e2, W_mat, out=y)

        if not self.nobias:
            e1 = array.as_mat(x[0])
            e2 = array.as_mat(x[1])
            with cuda.using_cumisc():
                # ij,jl->il
                cuda.culinalg.add_dot(e1, self.V1, y)
                # ik,kl->il
                cuda.culinalg.add_dot(e2, self.V2, y)
            cuda.elementwise(
                'float* y, float* b, int n_channel',
                'y[i] += b[i % n_channel]',
                'linear_bias')(y, self.b, self.b.size)
        return y,
Exemplo n.º 2
0
    def forward_gpu(self, x):
        i_len, j_len = array.as_mat(x[0]).shape
        k_len = array.as_mat(x[1]).shape[1]
        l_len = self.W.shape[2]

        # When indices are enclosed with [], they are 'flatten'
        # (i.e. linealized as 1-D array)
        # ij->[ij]
        e1 = array.as_vec(x[0])
        # ik->[ik]
        e2 = array.as_vec(x[1])
        e1e2 = cuda.empty(i_len * j_len * k_len, dtype=numpy.float32)
        # '[ij],[ik]->[ijk]'
        cuda.elementwise(
            'float* y, float* e1, float* e2, int e1c, int e2c', '''
            int I = i / e1c / e2c;
            int J = (i - I * e1c * e2c) / e2c;
            int K = i % e2c;
            y[i] = e1[I * e1c + J] * e2[I * e2c + K];
            ''', 'row_wise_outer_product')(e1e2, e1, e2, j_len, k_len)

        # [ijk]->i[jk]
        e1e2 = e1e2.reshape(i_len, j_len * k_len)

        # jkl->[jk]l
        W_mat = self.W.reshape(self.W.shape[0] * self.W.shape[1],
                               self.W.shape[2])

        y = cuda.empty((i_len, l_len), dtype=numpy.float32)
        with cuda.using_cumisc():
            # 'i[jk],[jk]l->il'
            cuda.culinalg.dot(e1e2, W_mat, out=y)

        if not self.nobias:
            e1 = array.as_mat(x[0])
            e2 = array.as_mat(x[1])
            with cuda.using_cumisc():
                # ij,jl->il
                cuda.culinalg.add_dot(e1, self.V1, y)
                # ik,kl->il
                cuda.culinalg.add_dot(e2, self.V2, y)
            cuda.elementwise('float* y, float* b, int n_channel',
                             'y[i] += b[i % n_channel]',
                             'linear_bias')(y, self.b, self.b.size)
        return y,
Exemplo n.º 3
0
    def backward_gpu(self, x, gy):
        i_len, j_len = array.as_mat(x[0]).shape
        k_len = array.as_mat(x[1]).shape[1]
        l_len = gy[0].shape[1]

        # ij->[ij]
        e1 = array.as_vec(x[0])
        # ik->[ik]
        e2 = array.as_vec(x[1])
        gy, = gy
        # il->[il]
        gy_vec = array.as_vec(gy)
        # jkl->[jkl]
        W_vec = array.as_vec(self.W)

        dgW = cuda.empty((j_len * k_len * l_len,), dtype=numpy.float32)
        # '[ij],[ik],[il]->[jkl]'
        cuda.elementwise(
            '''
            float* y, float* e1, float* e2, float* gy,
            int r, int e1c, int e2c, int gyc
            ''',
            '''
            int J = i / e2c / gyc;
            int K = (i - J * e2c * gyc) / gyc;
            int L = i % gyc;
            float yval = 0;
            for (int I = 0; I < r; ++I) {
                int e1idx = I * e1c + J;
                int e2idx = I * e2c + K;
                int gyidx = I * gyc + L;
                yval += e1[e1idx] * e2[e2idx] * gy[gyidx];
            }
            y[i] = yval;
            ''',
            'sum_of_three_ary_tensor_product')(
                dgW, e1, e2, gy_vec, i_len, j_len, k_len, l_len)
        # [jkl]->jkl
        self.gW += dgW.reshape((j_len, k_len, l_len))

        if not self.nobias:
            e1 = array.as_mat(x[0])
            e2 = array.as_mat(x[1])
            with cuda.using_cumisc():
                # ij,il->jl
                cuda.culinalg.add_dot(e1, gy, self.gV1, transa='T')
                # ik,il->kl
                cuda.culinalg.add_dot(e2, gy, self.gV2, transa='T')
                self.gb += cuda.cumisc.sum(gy, 0)

        ge1 = cuda.empty((i_len * j_len,), dtype=numpy.float32)
        # '[ik],[jkl],[il]->[ij]'
        cuda.elementwise(
            '''
            float* y, float* e, float* W, float* gy,
            int ec, int gyc, int gec
            ''',
            '''
            int I = i / gec;
            int J = i % gec;
            float yval = 0;
            for (int K = 0; K < ec; ++K) {
                for (int L = 0; L < gyc; ++L) {
                    int eidx = I * ec + K;
                    int Widx = J * ec * gyc + K * gyc + L;
                    int gyidx = I * gyc + L;
                    yval += e[eidx] * W[Widx] * gy[gyidx];
                }
            }
            y[i] = yval;
            ''',
            'ge_kernel')(ge1, e2, W_vec, gy_vec, k_len, l_len, j_len)
        # [ij]->ij
        ge1 = ge1.reshape(i_len, j_len)

        ge2 = cuda.empty((i_len * k_len,), dtype=numpy.float32)
        # '[ij],[jkl],[il]->[ik]'
        cuda.elementwise(
            '''
            float* y, float* e, float* W, float* gy,
            int ec, int gyc, int gec
            ''',
            '''
            int I = i / gec;
            int K = i % gec;
            float yval = 0;
            for (int J = 0; J < ec; ++J) {
                for (int L = 0; L < gyc; ++L) {
                    int eidx = I * ec + J;
                    int Widx = J * gec * gyc + K * gyc + L;
                    int gyidx = I * gyc + L;
                    yval += e[eidx] * W[Widx] * gy[gyidx];
                }
            }
            y[i] = yval;
            ''',
            'ge_kernel2')(ge2, e1, W_vec, gy_vec, j_len, l_len, k_len)
        # [ik]->ik
        ge2 = ge2.reshape(i_len, k_len)

        if not self.nobias:
            with cuda.using_cumisc():
                # il,jl->ij
                cuda.culinalg.add_dot(gy, self.V1, ge1, transb='T')
                # il,kl->ik
                cuda.culinalg.add_dot(gy, self.V2, ge2, transb='T')
        return (ge1.reshape(x[0].shape), ge2.reshape(x[1].shape))
Exemplo n.º 4
0
    def backward_gpu(self, x, gy):
        i_len, j_len = array.as_mat(x[0]).shape
        k_len = array.as_mat(x[1]).shape[1]
        l_len = gy[0].shape[1]

        # ij->[ij]
        e1 = array.as_vec(x[0])
        # ik->[ik]
        e2 = array.as_vec(x[1])
        gy, = gy
        # il->[il]
        gy_vec = array.as_vec(gy)
        # jkl->[jkl]
        W_vec = array.as_vec(self.W)

        dgW = cuda.empty((j_len * k_len * l_len, ), dtype=numpy.float32)
        # '[ij],[ik],[il]->[jkl]'
        cuda.elementwise(
            '''
            float* y, float* e1, float* e2, float* gy,
            int r, int e1c, int e2c, int gyc
            ''', '''
            int J = i / e2c / gyc;
            int K = (i - J * e2c * gyc) / gyc;
            int L = i % gyc;
            float yval = 0;
            for (int I = 0; I < r; ++I) {
                int e1idx = I * e1c + J;
                int e2idx = I * e2c + K;
                int gyidx = I * gyc + L;
                yval += e1[e1idx] * e2[e2idx] * gy[gyidx];
            }
            y[i] = yval;
            ''', 'sum_of_three_ary_tensor_product')(dgW, e1, e2, gy_vec, i_len,
                                                    j_len, k_len, l_len)
        # [jkl]->jkl
        self.gW += dgW.reshape((j_len, k_len, l_len))

        if not self.nobias:
            e1 = array.as_mat(x[0])
            e2 = array.as_mat(x[1])
            with cuda.using_cumisc():
                # ij,il->jl
                cuda.culinalg.add_dot(e1, gy, self.gV1, transa='T')
                # ik,il->kl
                cuda.culinalg.add_dot(e2, gy, self.gV2, transa='T')
                self.gb += cuda.cumisc.sum(gy, 0)

        ge1 = cuda.empty((i_len * j_len, ), dtype=numpy.float32)
        # '[ik],[jkl],[il]->[ij]'
        cuda.elementwise(
            '''
            float* y, float* e, float* W, float* gy,
            int ec, int gyc, int gec
            ''', '''
            int I = i / gec;
            int J = i % gec;
            float yval = 0;
            for (int K = 0; K < ec; ++K) {
                for (int L = 0; L < gyc; ++L) {
                    int eidx = I * ec + K;
                    int Widx = J * ec * gyc + K * gyc + L;
                    int gyidx = I * gyc + L;
                    yval += e[eidx] * W[Widx] * gy[gyidx];
                }
            }
            y[i] = yval;
            ''', 'ge_kernel')(ge1, e2, W_vec, gy_vec, k_len, l_len, j_len)
        # [ij]->ij
        ge1 = ge1.reshape(i_len, j_len)

        ge2 = cuda.empty((i_len * k_len, ), dtype=numpy.float32)
        # '[ij],[jkl],[il]->[ik]'
        cuda.elementwise(
            '''
            float* y, float* e, float* W, float* gy,
            int ec, int gyc, int gec
            ''', '''
            int I = i / gec;
            int K = i % gec;
            float yval = 0;
            for (int J = 0; J < ec; ++J) {
                for (int L = 0; L < gyc; ++L) {
                    int eidx = I * ec + J;
                    int Widx = J * gec * gyc + K * gyc + L;
                    int gyidx = I * gyc + L;
                    yval += e[eidx] * W[Widx] * gy[gyidx];
                }
            }
            y[i] = yval;
            ''', 'ge_kernel2')(ge2, e1, W_vec, gy_vec, j_len, l_len, k_len)
        # [ik]->ik
        ge2 = ge2.reshape(i_len, k_len)

        if not self.nobias:
            with cuda.using_cumisc():
                # il,jl->ij
                cuda.culinalg.add_dot(gy, self.V1, ge1, transb='T')
                # il,kl->ik
                cuda.culinalg.add_dot(gy, self.V2, ge2, transb='T')
        return (ge1.reshape(x[0].shape), ge2.reshape(x[1].shape))