def backward_grads(self): if self.target: return gpu.dot(self.activation.T, self.next_layer.error, self.w_grad_next) if self.next_layer: self.next_layer.backward_grads() gpu.dot(self.bias_ones.T, self.next_layer.error, self.b_grad_next)
def backward_errors(self): if self.next_layer: self.next_layer.backward_errors() else: gpu.subtract(self.out,self.target,self.error) return if type(self.funcs) is Input: return self.funcs.grad(self.activation,self.out) gpu.dot(self.next_layer.error, self.w_next.T, self.error) gpu.multiply(self.error, self.out, self.error)
def backward_errors(self): if self.next_layer: self.next_layer.backward_errors() else: gpu.subtract(self.out, self.target, self.error) return if type(self.funcs) is Input: return self.funcs.grad(self.activation, self.out) gpu.dot(self.next_layer.error, self.w_next.T, self.error) gpu.multiply(self.error, self.out, self.error)
def forward(self, data=None, target=None,inTrainingMode=True): if data is not None: self.unitcount = data.shape[1] self.handle_input_size(data.shape[0]) self.root.target = target self.funcs.activation(data, self.activation, self.out, inTrainingMode) #if inTrainingMode: self.handle_parallelism() else: #if inTrainingMode: self.handle_parallelism() #cool gpu.dot(self.prev_layer.out,self.prev_layer.w_next,self.activation) #not cool activation, dot problem? -> nope, memory problem (wrong buffer size)? #print self.prev_layer.out.sum() #print self.prev_layer.w_next.sum() #print self.activation.sum() #print 'a' #sleep(0.5) gpu.add(self.activation, self.prev_layer.b_next, self.activation) self.funcs.activation(self.activation, self.activation, self.out, inTrainingMode) if self.next_layer: self.next_layer.forward(None, None, inTrainingMode)
def forward(self, data=None, target=None, inTrainingMode=True): if data is not None: self.unitcount = data.shape[1] self.handle_input_size(data.shape[0]) self.root.target = target self.funcs.activation(data, self.activation, self.out, inTrainingMode) #if inTrainingMode: self.handle_parallelism() else: #if inTrainingMode: self.handle_parallelism() #cool gpu.dot(self.prev_layer.out, self.prev_layer.w_next, self.activation) #not cool activation, dot problem? -> nope, memory problem (wrong buffer size)? #print self.prev_layer.out.sum() #print self.prev_layer.w_next.sum() #print self.activation.sum() #print 'a' #sleep(0.5) gpu.add(self.activation, self.prev_layer.b_next, self.activation) self.funcs.activation(self.activation, self.activation, self.out, inTrainingMode) if self.next_layer: self.next_layer.forward(None, None, inTrainingMode)
gpu.dot(input,W,output) else: gpu.dot(W, input, output) t.tick(str(dim_inner)) print t.tock(str(dim_inner))/5/iters mean_time = 0 for i in range(5): iters = 100 #warmup for j in range(1000): if batch_first_mode: gpu2.dot(input2,W2,output2) else: gpu2.dot(W2,input2,output2) t.tick(str(dim_inner)) for j in range(iters): if batch_first_mode: gpu2.dot(input2,W2,output2) else: gpu2.dot(W2,input2,output2) t.tick(str(dim_inner)) print t.tock(str(dim_inner))/5/iters
inputT = rdm(input_cols, batch_size) outputT = rdm(hidden_size, batch_size) errors_W = rdm(batch_size, hidden_size * 4) errors_R = rdm(batch_size, hidden_size * 4) inputs_stackedR = rdm(hidden_size, T * batch_size) errors_stackedR = rdm(T * batch_size, hidden_size * w_stacking) iters = 500 mean_time = 0 t0 = time.time() t.tick("stacking") for i in range(iters): #for step in range(T): #gpu.dot(inputs_stackedW,errors_stackedW, W) gpu.dot(inputs_stackedR, errors_stackedR, R) print "{0}.ms".format(t.tock("stacking") / iters) print(time.time() - t0) / iters * 1000 ''' iters = 5 mean_time = 0 t0 = time.time() for i in range(iters): t.tick("no stacking") for step in range(T): gpu.dot(inputT,errors_W, W) gpu.dot(outputT,errors_R, R) t.tick("no stacking")
else: gpu.dot(W, input, output) t.tick(str(dim_inner)) for j in range(iters): if batch_first_mode: gpu.dot(input, W, output) else: gpu.dot(W, input, output) t.tick(str(dim_inner)) print t.tock(str(dim_inner)) / 5 / iters mean_time = 0 for i in range(5): iters = 100 #warmup for j in range(1000): if batch_first_mode: gpu2.dot(input2, W2, output2) else: gpu2.dot(W2, input2, output2) t.tick(str(dim_inner)) for j in range(iters): if batch_first_mode: gpu2.dot(input2, W2, output2) else: gpu2.dot(W2, input2, output2) t.tick(str(dim_inner)) print t.tock(str(dim_inner)) / 5 / iters