示例#1
0
 def weight_update(self):
     if self.next_layer:    
         #batch_size = ((self.out.shape[2]*gpu.gpu_count()) if self.config['parallelism'] == 'data' else self.out.shape[2])
         batch_size = self.out.shape[2]
         
         if self.has_gradients:
             #x = self.w_grad_next.tocpu()
             #x[np.abs(x) < 0.005] = 0
             #del self.w_grad_next                 
             #self.w_grad_next = gpu.array(x)
             lib.funcs.inp_RMSProp(self.m_next.pt, self.w_grad_next.pt, ct.c_float(self.config['momentum']),ct.c_float(self.config['learning_rate']), batch_size)                
             gpu.subtract(self.w_next, self.w_grad_next, self.w_next)
             
             if not self.test_buffer:
                 self.test_buffer = gpu.empty_like(self.w_next)
             
             gpu.fill(self.test_buffer, 0.005)
             gpu.greater(self.w_next, self.test_buffer, self.test_buffer)
             gpu.multiply(self.w_next, self.test_buffer, self.w_next)
             
             
         #apply grad only after initializing RMSProp with the first gradient
         if not self.has_gradients: 
             self.has_gradients = True
             #TODO: this should work
             #gpu.div(self.w_grad_next, batch_size, self.m_next)
             
                  
         if self.config['parallelism'] != 'data':
             self.next_layer.weight_update()
示例#2
0
    def weight_update(self):
        if self.next_layer:
            #batch_size = ((self.out.shape[2]*gpu.gpu_count()) if self.config['parallelism'] == 'data' else self.out.shape[2])
            batch_size = self.out.shape[2]

            if self.has_gradients:
                #x = self.w_grad_next.tocpu()
                #x[np.abs(x) < 0.005] = 0
                #del self.w_grad_next
                #self.w_grad_next = gpu.array(x)
                lib.funcs.inp_RMSProp(self.m_next.pt, self.w_grad_next.pt,
                                      ct.c_float(self.config['momentum']),
                                      ct.c_float(self.config['learning_rate']),
                                      batch_size)
                gpu.subtract(self.w_next, self.w_grad_next, self.w_next)

                if not self.test_buffer:
                    self.test_buffer = gpu.empty_like(self.w_next)

                gpu.fill(self.test_buffer, 0.005)
                gpu.greater(self.w_next, self.test_buffer, self.test_buffer)
                gpu.multiply(self.w_next, self.test_buffer, self.w_next)

            #apply grad only after initializing RMSProp with the first gradient
            if not self.has_gradients:
                self.has_gradients = True
                #TODO: this should work
                #gpu.div(self.w_grad_next, batch_size, self.m_next)

            if self.config['parallelism'] != 'data':
                self.next_layer.weight_update()
示例#3
0
 def backward_errors(self):
     if self.next_layer: self.next_layer.backward_errors()
     else: 
         gpu.subtract(self.out,self.target,self.error)
         return
     
     if type(self.funcs) is Input: return
     
     self.funcs.grad(self.activation,self.out)
     gpu.dot(self.next_layer.error, self.w_next.T, self.error)
     gpu.multiply(self.error, self.out, self.error)
示例#4
0
    def backward_errors(self):
        if self.next_layer: self.next_layer.backward_errors()
        else:
            gpu.subtract(self.out, self.target, self.error)
            return

        if type(self.funcs) is Input: return

        self.funcs.grad(self.activation, self.out)
        gpu.dot(self.next_layer.error, self.w_next.T, self.error)
        gpu.multiply(self.error, self.out, self.error)