def forward_gpu(self, inputs): x, targets = inputs N = x.shape[0] #Linear function z = cuda.empty((N,self.no_labels), dtype=np.float32) cuk.dot(x, self.W, out=z, transb='t') if not self.nobias: cuk.addVec2Mat(z, self.b) self.probs = z if cudnn.enabled and self.use_cudnn: handle = cudnn.get_default_handle() desc = cudnn.get_tensor_desc(z, 1, 1) libcudnn.cudnnSoftmaxForward( handle, _algorithm, _mode, 1, desc.value, cudnn.get_ptr(z), 0, desc.value, cudnn.get_ptr(self.probs)) else: cuk.softmax(z, self.probs) if self.return_probs: return self.probs, if self.compute_loss: correct_probs = cuda.empty((N,),dtype=np.float32) cuk.getByIndex_LogAndClip( self.probs, targets, out=correct_probs) loss = -cuda.cumisc.sum(correct_probs, keepdims=True)/N else: loss = np.atleast_2d(np.array(np.nan,dtype=np.float32)) return loss,
def forward_gpu(self, inputs): x, h_tm1, c_tm1 = inputs N = x.shape[0] z = cuda.empty((N,self.out_size*4),dtype=np.float32) z = cuk.dot(x, self.W, out=z, transb = 't') cuk.dotAdd(h_tm1, self.U, C=z, transb='t') if not self.nobias: cuk.addVec2Mat(z, self.b) self.z = z self.c = cuda.empty_like(c_tm1) self.h = cuda.empty_like(h_tm1) cuk.lstm_forward(z=z, c_tm1=c_tm1, c=self.c, h=self.h, out_size=self.out_size) return self.h, self.c
def forward_gpu(self, inputs): x, targets = inputs N = x.shape[0] #Linear function y = cuda.empty((N,1), dtype=np.float32) cuk.dot(x, self.W, out=y, transb='t') if not self.nobias: cuk.addVec2Mat(y, self.b) if self.return_y: return y, self.diff = cuk.vecAdd(y, -targets) if self.compute_loss: loss = cuda.culinalg.norm(self.diff)**2 loss = np.atleast_2d(np.array(cuda.to_cpu(loss)))*1.0/(2*N) else: loss = np.atleast_2d(np.array(np.nan,dtype=np.float32)) return loss,
def forward_gpu(self, inputs): x, h_tm1 = inputs N = x.shape[0] #update gate u = cuda.empty((N,self.out_size),dtype=np.float32) cuk.dot(x, self.Wu, out=u, transb = 't') cuk.dotAdd(h_tm1, self.Vu, C=u, transb='t') #reset gate r = cuda.empty((N,self.out_size),dtype=np.float32) cuk.dot(x, self.Wr, out=r, transb = 't') cuk.dotAdd(h_tm1, self.Vr, C=r, transb='t') if not self.nobias: cuk.addVec2Mat(u, self.bu) cuk.addVec2Mat(r, self.br) self.u = cuk.sigmoid(x=u, out=u) self.r = cuk.sigmoid(x=r, out=r) #new memory HV = cuda.empty((N,self.out_size),dtype=np.float32) self.HV = cuk.dot(h_tm1, self.Vh, out=HV, transb='t') h_tilde = cuda.empty((N,self.out_size),dtype=np.float32) h_tilde = cuk.hadamard(r, self.HV, out=h_tilde) cuk.dotAdd(x, self.Wh, C=h_tilde, transb='t') if not self.nobias: cuk.addVec2Mat(h_tilde, self.bh) self.h_tilde = cuk.tanh(x=h_tilde, out=h_tilde) #hidden state h = cuda.empty((N,self.out_size),dtype=np.float32) self.h = cuk.gru_forward(u=u, h_tilde=h_tilde, h_tm1=h_tm1, out=h) return self.h,
def forward_gpu(self, inputs): x, h_tm1 = inputs z = cuda.empty_like(h_tm1) #Linear function if self.hot: cuk.hotdot(self.W, x, out=z, dont_add=True) else: cuk.dot(x, self.W, out=z, transb='t') cuk.dotAdd(h_tm1, self.V, C=z, transb='t') if not self.nobias: cuk.addVec2Mat(z, self.b) #apply non-linear activation if self.act_func_str in ('tanh', 'sigmoid'): h = self.cu_act_func(x=z, out=z) self.h = h #save h for backpropagation elif self.act_func_str in ('leakyrelu', 'relu'): h = cuda.empty_like(z) h = self.cu_act_func(x=z, out=h) self.z = z #save z for backpropagation else: raise NotImplementedError('the activation function is not available') return h,