def create_weights(self): self.log_network() if self.next_layer: self.w_next = gpu.array( create_uniform_rdm_weight(self.unitcount, self.next_layer.unitcount)) self.b_next = gpu.zeros((1, self.next_layer.unitcount)) self.m_next = gpu.zeros( (self.unitcount, self.next_layer.unitcount)) self.w_grad_next = gpu.zeros( (self.unitcount, self.next_layer.unitcount)) self.b_grad_next = gpu.zeros((1, self.next_layer.unitcount)) self.w_next_sync = gpu.zeros( (self.unitcount, self.next_layer.unitcount)) if self.next_layer.config['compression'] == '1bit': self.errors = gpu.zeros_like(self.w_grad_next) self.posMask = gpu.zeros_like(self.w_grad_next) self.negMask = gpu.zeros_like(self.w_grad_next) self.w_grad_with_errors = gpu.zeros_like(self.w_grad_next) self.posCount = gpu.zeros((self.w_grad_next.shape_tensor[2], )) self.negCount = gpu.zeros((self.w_grad_next.shape_tensor[2], )) self.posAvg = gpu.zeros((self.w_grad_next.shape_tensor[2], )) self.negAvg = gpu.zeros((self.w_grad_next.shape_tensor[2], )) if self.next_layer.config['compression'] == '8bit': self.max_value_buffer = gpu.empty_like(self.w_grad_next) if self.next_layer: self.next_layer.create_weights()
def handle_offsize(self, batch_size): if self.activation_offsize == None: split_axis = (2 if self.config['parallelism'] == 'data' else -1) self.activation_offsize = gpu.empty((batch_size, self.unitcount), split_axis) self.out_offsize = gpu.empty((batch_size, self.unitcount), split_axis) self.error_offsize = gpu.empty((batch_size, self.unitcount), split_axis) self.bias_ones_offsize = gpu.zeros((batch_size, 1), split_axis) + 1 swap(self.activation, self.activation_offsize) swap(self.out, self.out_offsize) swap(self.error, self.error_offsize) swap(self.bias_ones, self.bias_ones_offsize) elif self.activation_offsize.shape[2] != batch_size: del self.activation del self.out del self.error del self.bias_ones self.create_buffers(batch_size) else: swap(self.activation, self.activation_offsize) swap(self.out, self.out_offsize) swap(self.error, self.error_offsize) swap(self.bias_ones, self.bias_ones_offsize)
def create_weights(self): self.log_network() if self.next_layer: self.w_next = gpu.array(create_uniform_rdm_weight(self.unitcount,self.next_layer.unitcount)) self.b_next = gpu.zeros((1, self.next_layer.unitcount)) self.m_next = gpu.zeros((self.unitcount, self.next_layer.unitcount)) self.w_grad_next = gpu.zeros((self.unitcount, self.next_layer.unitcount)) self.b_grad_next = gpu.zeros((1, self.next_layer.unitcount)) self.w_next_sync = gpu.zeros((self.unitcount,self.next_layer.unitcount)) if self.next_layer.config['compression'] == '1bit': self.errors = gpu.zeros_like(self.w_grad_next) self.posMask = gpu.zeros_like(self.w_grad_next) self.negMask = gpu.zeros_like(self.w_grad_next) self.w_grad_with_errors = gpu.zeros_like(self.w_grad_next) self.posCount = gpu.zeros((self.w_grad_next.shape_tensor[2],)) self.negCount = gpu.zeros((self.w_grad_next.shape_tensor[2],)) self.posAvg = gpu.zeros((self.w_grad_next.shape_tensor[2],)) self.negAvg = gpu.zeros((self.w_grad_next.shape_tensor[2],)) if self.next_layer.config['compression'] == '8bit': self.max_value_buffer = gpu.empty_like(self.w_grad_next) if self.next_layer: self.next_layer.create_weights()
def handle_offsize(self, batch_size): if self.activation_offsize == None: split_axis = (2 if self.config['parallelism'] == 'data' else -1) self.activation_offsize = gpu.empty((batch_size,self.unitcount),split_axis) self.out_offsize = gpu.empty((batch_size,self.unitcount),split_axis) self.error_offsize = gpu.empty((batch_size,self.unitcount),split_axis) self.bias_ones_offsize = gpu.zeros((batch_size,1),split_axis)+1 swap(self.activation, self.activation_offsize) swap(self.out, self.out_offsize) swap(self.error, self.error_offsize) swap(self.bias_ones, self.bias_ones_offsize) elif self.activation_offsize.shape[2] != batch_size: del self.activation del self.out del self.error del self.bias_ones self.create_buffers(batch_size) else: swap(self.activation, self.activation_offsize) swap(self.out, self.out_offsize) swap(self.error, self.error_offsize) swap(self.bias_ones, self.bias_ones_offsize)