def forward_gpu(self, inputs): x, h_tm1 = inputs N = x.shape[0] #update gate u = cuda.empty((N,self.out_size),dtype=np.float32) cuk.dot(x, self.Wu, out=u, transb = 't') cuk.dotAdd(h_tm1, self.Vu, C=u, transb='t') #reset gate r = cuda.empty((N,self.out_size),dtype=np.float32) cuk.dot(x, self.Wr, out=r, transb = 't') cuk.dotAdd(h_tm1, self.Vr, C=r, transb='t') if not self.nobias: cuk.addVec2Mat(u, self.bu) cuk.addVec2Mat(r, self.br) self.u = cuk.sigmoid(x=u, out=u) self.r = cuk.sigmoid(x=r, out=r) #new memory HV = cuda.empty((N,self.out_size),dtype=np.float32) self.HV = cuk.dot(h_tm1, self.Vh, out=HV, transb='t') h_tilde = cuda.empty((N,self.out_size),dtype=np.float32) h_tilde = cuk.hadamard(r, self.HV, out=h_tilde) cuk.dotAdd(x, self.Wh, C=h_tilde, transb='t') if not self.nobias: cuk.addVec2Mat(h_tilde, self.bh) self.h_tilde = cuk.tanh(x=h_tilde, out=h_tilde) #hidden state h = cuda.empty((N,self.out_size),dtype=np.float32) self.h = cuk.gru_forward(u=u, h_tilde=h_tilde, h_tm1=h_tm1, out=h) return self.h,
def forward_gpu(self, inputs): x, targets = inputs N = x.shape[0] #Linear function z = cuda.empty((N,self.no_labels), dtype=np.float32) cuk.dot(x, self.W, out=z, transb='t') if not self.nobias: cuk.addVec2Mat(z, self.b) self.probs = z if cudnn.enabled and self.use_cudnn: handle = cudnn.get_default_handle() desc = cudnn.get_tensor_desc(z, 1, 1) libcudnn.cudnnSoftmaxForward( handle, _algorithm, _mode, 1, desc.value, cudnn.get_ptr(z), 0, desc.value, cudnn.get_ptr(self.probs)) else: cuk.softmax(z, self.probs) if self.return_probs: return self.probs, if self.compute_loss: correct_probs = cuda.empty((N,),dtype=np.float32) cuk.getByIndex_LogAndClip( self.probs, targets, out=correct_probs) loss = -cuda.cumisc.sum(correct_probs, keepdims=True)/N else: loss = np.atleast_2d(np.array(np.nan,dtype=np.float32)) return loss,
def backward_gpu(self, inputs, grad_outputs): gh = grad_outputs[0] x, h_tm1 = inputs N = x.shape[0] gz = cuda.empty_like(gh) if self.act_func_str in ('tanh', 'sigmoid'): #backpropagate non-linearities gz = self.cu_dact_func(gy=gh, y=self.h, out=gz) # compute gradient with respect to the hidden input state gh_tm1 = cuk.dot(gz, self.V, out=self.h) elif self.act_func_str in ('leakyrelu', 'relu'): #backpropagate non-linearities gz = self.cu_dact_func(x=self.z, gy=gh, out=gz) # compute gradient with respect to the hidden input state gh_tm1 = cuk.dot(gz, self.V, out=self.z) else: raise NotImplementedError('the activation function is not available') #backpropagate linear function if self.hot: gx = None cuk.dothot(gz, x, in_size=self.in_size, out=self.gW) else: gx = cuda.empty_like(x) cuk.dot(gz, self.W, out=gx) cuk.dotAdd(gz, x, C=self.gW, transa='t') cuk.dotAdd(gz, h_tm1, C=self.gV, transa='t') if not self.nobias: gb_ones = cuda.ones((1,N),dtype=np.float32) cuk.dotAdd(gb_ones, gz, C=self.gb) return gx, gh_tm1
def backward_gpu(self, inputs, grad_outputs): x, targets = inputs gloss = grad_outputs[0] N = x.shape[0] coeff = gloss*1.0/N cuda.culinalg.scale(coeff, self.diff, alpha_real=True) gy = self.diff gtargets = None #backpropagate linear function gx = cuda.empty_like(x) cuk.dot(gy, self.W, out=gx) cuk.dotAdd(gy, x, C=self.gW, transa='t') if not self.nobias: gb_ones = cuda.ones((1,N),dtype=np.float32) cuk.dotAdd(gb_ones, gy, C=self.gb) return gx, gtargets
def backward_gpu(self, inputs, grad_outputs): x, targets = inputs gloss = cuda.to_gpu(grad_outputs[0]) N = x.shape[0] gtargets = None # the function is non-differentiable with respect to the targets #backpropagate Softmax Cross Entropy Error gz = self.probs gz = cuk.dSoftmaxCrossEntropy(gz, targets, gloss) #backpropagate linear function gx = cuda.empty_like(x) cuk.dot(gz, self.W, out=gx) cuk.dotAdd(gz, x, C=self.gW, transa='t') if not self.nobias: gb_ones = cuda.ones((1,N),dtype=np.float32) cuk.dotAdd(gb_ones, gz, C=self.gb) return gx, gtargets
def backward_gpu(self, inputs, grad_outputs): gh, gc = grad_outputs x, h_tm1, c_tm1 = inputs if gh is None: gh = cuda.to_gpu(np.array([[0]], dtype=np.float32)) gh_is_none = 1 else: gh_is_none = 0 if gc is None: gc = cuda.to_gpu(np.array([[0]], dtype=np.float32)) gc_is_none = 1 else: gc_is_none = 0 gc_tm1 = self.c cuk.lstm_backward(c=self.c, z=self.z, gh=gh, gc=gc, c_tm1=c_tm1, gc_is_none=gc_is_none, gh_is_none=gh_is_none) gz = self.z gh_tm1 = cuk.dot(gz, self.U, out=self.h) # compute gradient with respect to the input x gx = cuda.empty_like(x) gx = cuk.dot(gz, self.W, out=gx) # compute gradients of weight matrices cuk.dotAdd(gz, x, C=self.gW, transa='t') cuk.dotAdd(gz, h_tm1, C=self.gU, transa='t') if not self.nobias: N = x.shape[0] gb_ones = cuda.ones((1,N),dtype=np.float32) cuk.dotAdd(gb_ones, gz, C=self.gb) return gx, gh_tm1, gc_tm1
def forward_gpu(self, inputs): x, targets = inputs N = x.shape[0] #Linear function y = cuda.empty((N,1), dtype=np.float32) cuk.dot(x, self.W, out=y, transb='t') if not self.nobias: cuk.addVec2Mat(y, self.b) if self.return_y: return y, self.diff = cuk.vecAdd(y, -targets) if self.compute_loss: loss = cuda.culinalg.norm(self.diff)**2 loss = np.atleast_2d(np.array(cuda.to_cpu(loss)))*1.0/(2*N) else: loss = np.atleast_2d(np.array(np.nan,dtype=np.float32)) return loss,
def backward_gpu(self, inputs, grad_outputs): x, h_tm1 = inputs gh = grad_outputs[0] gu, gh_tilde, gh_tm1, gr, ghr = cuk.gru_backward( gu=self.h, h_tm1=h_tm1, h_tilde=self.h_tilde, gh_tilde=self.h_tilde, gh=gh, u=self.u, gh_tm1=self.u, gr=self.HV, r=self.r, HV=self.HV, ghr=self.r) gx = cuda.empty_like(x) cuk.dot(gu, self.Wu, out=gx) cuk.dotAdd(gr, self.Wr, C=gx) cuk.dotAdd(gh_tilde, self.Wh, C=gx) cuk.dotAdd(ghr, self.Vh, C=gh_tm1) cuk.dotAdd(gr, self.Vr, C=gh_tm1) cuk.dotAdd(gu, self.Vu, C=gh_tm1) cuk.dotAdd(gu, x, C=self.gWu, transa='t') cuk.dotAdd(gu, h_tm1, C=self.gVu, transa='t') cuk.dotAdd(gr, x, C=self.gWr, transa='t') cuk.dotAdd(gr, h_tm1, C=self.gVr, transa='t') cuk.dotAdd(gh_tilde, x, C=self.gWh, transa='t') cuk.dotAdd(ghr, h_tm1, C=self.gVh, transa='t') if not self.nobias: N = x.shape[0] gb_ones = cuda.ones((1,N),dtype=np.float32) cuk.dotAdd(gb_ones, gu, C=self.gbu) cuk.dotAdd(gb_ones, gr, C=self.gbr) cuk.dotAdd(gb_ones, gh_tilde, C=self.gbh) return gx, gh_tm1
def forward_gpu(self, inputs): x, h_tm1 = inputs z = cuda.empty_like(h_tm1) #Linear function if self.hot: cuk.hotdot(self.W, x, out=z, dont_add=True) else: cuk.dot(x, self.W, out=z, transb='t') cuk.dotAdd(h_tm1, self.V, C=z, transb='t') if not self.nobias: cuk.addVec2Mat(z, self.b) #apply non-linear activation if self.act_func_str in ('tanh', 'sigmoid'): h = self.cu_act_func(x=z, out=z) self.h = h #save h for backpropagation elif self.act_func_str in ('leakyrelu', 'relu'): h = cuda.empty_like(z) h = self.cu_act_func(x=z, out=h) self.z = z #save z for backpropagation else: raise NotImplementedError('the activation function is not available') return h,
def forward_gpu(self, inputs): x, h_tm1, c_tm1 = inputs N = x.shape[0] z = cuda.empty((N,self.out_size*4),dtype=np.float32) z = cuk.dot(x, self.W, out=z, transb = 't') cuk.dotAdd(h_tm1, self.U, C=z, transb='t') if not self.nobias: cuk.addVec2Mat(z, self.b) self.z = z self.c = cuda.empty_like(c_tm1) self.h = cuda.empty_like(h_tm1) cuk.lstm_forward(z=z, c_tm1=c_tm1, c=self.c, h=self.h, out_size=self.out_size) return self.h, self.c