def weight_update(self): if self.next_layer: #batch_size = ((self.out.shape[2]*gpu.gpu_count()) if self.config['parallelism'] == 'data' else self.out.shape[2]) batch_size = self.out.shape[2] if self.has_gradients: #x = self.w_grad_next.tocpu() #x[np.abs(x) < 0.005] = 0 #del self.w_grad_next #self.w_grad_next = gpu.array(x) lib.funcs.inp_RMSProp(self.m_next.pt, self.w_grad_next.pt, ct.c_float(self.config['momentum']),ct.c_float(self.config['learning_rate']), batch_size) gpu.subtract(self.w_next, self.w_grad_next, self.w_next) if not self.test_buffer: self.test_buffer = gpu.empty_like(self.w_next) gpu.fill(self.test_buffer, 0.005) gpu.greater(self.w_next, self.test_buffer, self.test_buffer) gpu.multiply(self.w_next, self.test_buffer, self.w_next) #apply grad only after initializing RMSProp with the first gradient if not self.has_gradients: self.has_gradients = True #TODO: this should work #gpu.div(self.w_grad_next, batch_size, self.m_next) if self.config['parallelism'] != 'data': self.next_layer.weight_update()
def weight_update(self): if self.next_layer: #batch_size = ((self.out.shape[2]*gpu.gpu_count()) if self.config['parallelism'] == 'data' else self.out.shape[2]) batch_size = self.out.shape[2] if self.has_gradients: #x = self.w_grad_next.tocpu() #x[np.abs(x) < 0.005] = 0 #del self.w_grad_next #self.w_grad_next = gpu.array(x) lib.funcs.inp_RMSProp(self.m_next.pt, self.w_grad_next.pt, ct.c_float(self.config['momentum']), ct.c_float(self.config['learning_rate']), batch_size) gpu.subtract(self.w_next, self.w_grad_next, self.w_next) if not self.test_buffer: self.test_buffer = gpu.empty_like(self.w_next) gpu.fill(self.test_buffer, 0.005) gpu.greater(self.w_next, self.test_buffer, self.test_buffer) gpu.multiply(self.w_next, self.test_buffer, self.w_next) #apply grad only after initializing RMSProp with the first gradient if not self.has_gradients: self.has_gradients = True #TODO: this should work #gpu.div(self.w_grad_next, batch_size, self.m_next) if self.config['parallelism'] != 'data': self.next_layer.weight_update()
def backward_errors(self): if self.next_layer: self.next_layer.backward_errors() else: gpu.subtract(self.out,self.target,self.error) return if type(self.funcs) is Input: return self.funcs.grad(self.activation,self.out) gpu.dot(self.next_layer.error, self.w_next.T, self.error) gpu.multiply(self.error, self.out, self.error)
def backward_errors(self): if self.next_layer: self.next_layer.backward_errors() else: gpu.subtract(self.out, self.target, self.error) return if type(self.funcs) is Input: return self.funcs.grad(self.activation, self.out) gpu.dot(self.next_layer.error, self.w_next.T, self.error) gpu.multiply(self.error, self.out, self.error)