def updateGradientNeg(self, layer1, layer2, batchsize): cp.prod(self.w_tmp, layer1.act, layer2.act, 'n', 't', -1. / batchsize, 1. / batchsize) cp.reduce_to_col(self.blo_tmp, layer1.act, cp.reduce_functor.ADD, -1. / batchsize, 1. / batchsize) cp.reduce_to_col(self.bhi_tmp, layer2.act, cp.reduce_functor.ADD, -1. / batchsize, 1. / batchsize)
def updateLayer(self, layernum, sample = True): L = self.layers[layernum] if layernum == 0: self.downPass(layernum+1, sample = sample) if layernum == len(self.layers)-1: self.upPass(layernum-1, sample) if layernum<len(self.layers)-1 and layernum>0: hi = self.layers[layernum+1] lo = self.layers[layernum-1] wlo = self.weights[layernum-1] whi = self.weights[layernum] cp.prod(L.act, whi.mat, hi.act, 'n', 'n') cp.matrix_plus_col(L.act, whi.bias_lo) tmp = L.act.copy() cp.prod(L.act, wlo.mat, lo.act, 't', 'n') cp.matrix_plus_col(L.act, wlo.bias_hi) # add parts from above/below cp.apply_binary_functor(L.act, tmp, cp.binary_functor.AXPBY, 0.5, 0.5) tmp.dealloc() L.nonlinearity() if sample: L.sample()
def updateLayer(self, layernum, sample=True): L = self.layers[layernum] if layernum == 0: self.downPass(layernum + 1, sample=sample) if layernum == len(self.layers) - 1: self.upPass(layernum - 1, sample) if layernum < len(self.layers) - 1 and layernum > 0: hi = self.layers[layernum + 1] lo = self.layers[layernum - 1] wlo = self.weights[layernum - 1] whi = self.weights[layernum] cp.prod(L.act, whi.mat, hi.act, 'n', 'n') cp.matrix_plus_col(L.act, whi.bias_lo) tmp = L.act.copy() cp.prod(L.act, wlo.mat, lo.act, 't', 'n') cp.matrix_plus_col(L.act, wlo.bias_hi) # add parts from above/below cp.apply_binary_functor(L.act, tmp, cp.binary_functor.AXPBY, 0.5, 0.5) tmp.dealloc() L.nonlinearity() if sample: L.sample()
def forward(self, input, weight, bias, linear=False): result = cp.dev_tensor_float_cm([weight.shape[1], input.shape[1]]) cp.fill(result, 0) cp.prod(result, weight, input, "t", "n") cp.matrix_plus_col(result, bias) if not linear: cp.apply_scalar_functor(result, cp.scalar_functor.SIGM) return result
def forward(self, input, weight, bias,linear=False): result = cp.dev_tensor_float_cm([weight.shape[1], input.shape[1]]) cp.fill(result,0) cp.prod(result, weight, input, "t", "n") cp.matrix_plus_col(result, bias) if not linear: cp.apply_scalar_functor(result, cp.scalar_functor.SIGM) return result
def delta_hidden(self, weight, knownDerivative, netInput): deltaLo = cp.dev_tensor_float_cm([weight.shape[0], netInput.shape[1]]) cp.prod(deltaLo, weight, knownDerivative, 'n', 'n') help = netInput.copy() cp.apply_scalar_functor(help, cp.scalar_functor.DSIGM) cp.apply_binary_functor(deltaLo, help, cp.binary_functor.MULT) help.dealloc() return deltaLo
def get_distance_matrix(self, test): t = cp.dev_tensor_float_cm(test) assert t.shape[1] == self.data.shape[1] tsq = cp.dev_tensor_float(t.shape[0]) cp.reduce_to_col(tsq, t, cp.reduce_functor.ADD_SQUARED) p = cp.dev_tensor_float_cm([self.data.shape[0], t.shape[0]]) cp.prod(p, self.data, t, "n", "t", -2, 0) cp.matrix_plus_col(p, self.dsq) cp.matrix_plus_row(p, tsq) return p
def get_distance_matrix(self, test): t = cp.dev_tensor_float_cm(test) assert t.shape[1] == self.data.shape[1] tsq = cp.dev_tensor_float(t.shape[0]) cp.reduce_to_col(tsq,t,cp.reduce_functor.ADD_SQUARED) p = cp.dev_tensor_float_cm([self.data.shape[0], t.shape[0]]) cp.prod(p, self.data, t, 'n','t',-2, 0) cp.matrix_plus_col(p,self.dsq) cp.matrix_plus_row(p,tsq) return p
def backward(self): """Backward pass, calculates the deltas of lower layer and later updates the weights.""" cp.prod(self.source.deltas, self.weight, self.target.deltas, 't', 'n') h = cp.dev_matrix_cmf(self.source.activations.h, self.source.activations.w) cp.apply_binary_functor(h, self.source.activations, cp.binary_functor.COPY) self.source.d_nonlinearity(h) cp.apply_binary_functor(self.source.deltas, h, cp.binary_functor.MULT) h.dealloc() self.weight_update()
def weight_update(self, learnrate=0.01, decay=0.0): """Updates the weights and the bias using source activations and target deltas. @param learnrate how strongly the gradient influences the weights @param decay large values result in a regularization with to the squared weight value""" batch_size=self.source.activations.w h = cp.dev_matrix_cmf(self.weight.h, self.weight.w) cp.prod(h, self.target.deltas, self.source.activations, 'n', 't') cp.learn_step_weight_decay(self.weight, h, learnrate/batch_size, decay) h.dealloc() h = cp.get_filled_matrix(self.target.activations.h, 1, 0) cp.reduce_to_col(h.vec, self.target.deltas) cp.learn_step_weight_decay(self.bias, h, learnrate/batch_size, decay) h.dealloc()
def backward(self, learnrate=0.01, decay=0.0): """Backward pass, calculates the deltas of lower layer and updates the weights. @param learnrate how strongly the gradient influences the weights @param decay large values result in a regularization with to the squared weight value""" cp.prod(self.source.deltas, self.weight, self.target.deltas, 't', 'n') h = self.source.activations.copy() self.source.d_nonlinearity(h) self.source.deltas *= h h.dealloc() batch_size = self.source.activations.shape[1] dw = cp.prod(self.target.deltas, self.source.activations, 'n', 't') cp.learn_step_weight_decay(self.weight, dw, learnrate / batch_size, decay) dw.dealloc() db = cp.sum(self.target.deltas, 1) cp.learn_step_weight_decay(self.bias, db, learnrate / batch_size, decay) db.dealloc()
def partialsumV(self, actv, acth, row): """ sums out hidden variables for given v exp( log(exp(bh + actv*W)+1).sum(axis=0) + (v*bv).sum(axis=0) ) """ # acth = bv + actv*W cp.prod(acth, self.weight, actv, 't', 'n') cp.matrix_plus_col(acth, self.bh) # acth = log(exp(acth)+1) cp.apply_scalar_functor(acth, cp.scalar_functor.RECT, 1.0) # row = actv.sum(axis=0) cp.reduce_to_row(row, acth, cp.reduce_functor.ADD) # row += h*bh cp.matrix_times_col(actv, self.bv) cp.reduce_to_row(row, actv, cp.reduce_functor.ADD, 1.0, 1.0) # exp(row) m = row.np.astype("float64") return math.fsum(m.flatten()) / actv.shape[1]
def partialsumV(self, actv, acth, row): """ sums out hidden variables for given v exp( log(exp(bh + actv*W)+1).sum(axis=0) + (v*bv).sum(axis=0) ) """ # acth = bv + actv*W cp.prod(acth, self.weight, actv, "t", "n") cp.matrix_plus_col(acth, self.bh) # acth = log(exp(acth)+1) cp.apply_scalar_functor(acth, cp.scalar_functor.RECT, 1.0) # row = actv.sum(axis=0) cp.reduce_to_row(row, acth, cp.reduce_functor.ADD) # row += h*bh cp.matrix_times_col(actv, self.bv) cp.reduce_to_row(row, actv, cp.reduce_functor.ADD, 1.0, 1.0) # exp(row) m = row.np.astype("float64") return math.fsum(m.flatten()) / actv.shape[1]
def partialsum(self, acth, actv, row): """ sums out visible variables for given hidden variables exp( log(exp(bv + acth*W)+1).sum(axis=0) + (h*bh).sum(axis=0) ) """ # actv = bv + acth*W cp.prod(actv, self.weight, acth, "n", "n") cp.matrix_plus_col(actv, self.bv) # actv = log(exp(actv)+1) cp.apply_scalar_functor(actv, cp.scalar_functor.RECT, 1.0) # row = actv.sum(axis=0) cp.reduce_to_row(row, actv, cp.reduce_functor.ADD) # row += h*bh cp.matrix_times_col(acth, self.bh) cp.reduce_to_row(row, acth, cp.reduce_functor.ADD, 1.0, 1.0) # cp.prod(row,self.bv,actv,'t','n',1.0,1.0) # exp(row) m = row.np.astype("float64") return math.fsum(np.exp(m).flatten())
def sample_markov_chains(self,beta,step): cp.prod(self.h,self.w,self.v,'t','n') cp.matrix_plus_col(self.h,self.bias_hi) cp.apply_scalar_functor(self.h,cp.scalar_functor.MULT,beta) cp.apply_scalar_functor(self.h,cp.scalar_functor.SIGM) cp.rnd_binarize(self.h) cp.prod(self.v,self.w,self.h,'n','n') cp.matrix_plus_col(self.v,self.bias_lo) cp.apply_scalar_functor(self.v,cp.scalar_functor.MULT,beta) cp.apply_scalar_functor(self.baserate_bias,cp.scalar_functor.MULT,1-beta) cp.matrix_plus_col(self.v,self.baserate_bias) cp.apply_scalar_functor(self.baserate_bias,cp.scalar_functor.MULT,1.0/(1-beta)) cp.apply_scalar_functor(self.v,cp.scalar_functor.SIGM) #if step % 100 == 0: #plt.figure(1) #self.v_=self.v.np #showthis = self.v_.copy() #plt.matshow(showthis[:,0].reshape((28,28))) #plt.draw() #if not os.path.exists("/tmp/%s"%os.getlogin()): #os.mkdir("/tmp/%s"%os.getlogin()) #plt.savefig("/tmp/%s/chain_%05d.png"%(os.getlogin(),step)) cp.rnd_binarize(self.v)
def partialsum(self, acth, actv, row): """ sums out visible variables for given hidden variables exp( log(exp(bv + acth*W)+1).sum(axis=0) + (h*bh).sum(axis=0) ) """ # actv = bv + acth*W cp.prod(actv, self.weight, acth, 'n', 'n') cp.matrix_plus_col(actv, self.bv) # actv = log(exp(actv)+1) cp.apply_scalar_functor(actv, cp.scalar_functor.RECT, 1.0) # row = actv.sum(axis=0) cp.reduce_to_row(row, actv, cp.reduce_functor.ADD) # row += h*bh cp.matrix_times_col(acth, self.bh) cp.reduce_to_row(row, acth, cp.reduce_functor.ADD, 1.0, 1.0) #cp.prod(row,self.bv,actv,'t','n',1.0,1.0) # exp(row) m = row.np.astype("float64") return math.fsum(np.exp(m).flatten())
def p_k(self,beta,tmp,tmp2,collect): cp.prod(tmp,self.v,self.baserate_bias,'t','n') cp.apply_scalar_functor(tmp,cp.scalar_functor.MULT,(1-beta)) collect(tmp) cp.prod(tmp2,self.w,self.v,'t','n') cp.matrix_plus_col(tmp2,self.bias_hi) cp.apply_scalar_functor(tmp2,cp.scalar_functor.MULT,beta) # RECT computes log(1+exp(x)) cp.apply_scalar_functor(tmp2,cp.scalar_functor.RECT,1) cp.reduce_to_row(tmp.T,tmp2,cp.reduce_functor.ADD) # tmp.T is an evil hack. it makes tmp into row major, which doesn't change anything since it's a vector any way. But vectors are always assumed to be row major. collect(tmp) cp.prod(tmp,self.v,self.bias_lo.T,'t','n') cp.apply_scalar_functor(tmp,cp.scalar_functor.MULT,beta) collect(tmp)
def calculateDeltaWeights(self, derivative, input, oldWeights): result = cp.dev_tensor_float_cm(oldWeights.shape) cp.prod(result, input, derivative, 'n', 't') return result
def forward(self): """Forward pass, calculates the activations of next neuron layer.""" cp.prod(self.target.activations, self.weight, self.source.activations)
def updateGradientNeg(self, layer1, layer2, batchsize): cp.prod(self.w_tmp, layer1.act, layer2.act, 'n', 't', -1./batchsize, 1./batchsize) cp.reduce_to_col(self.blo_tmp, layer1.act, cp.reduce_functor.ADD, -1./batchsize, 1./batchsize) cp.reduce_to_col(self.bhi_tmp, layer2.act, cp.reduce_functor.ADD, -1./batchsize, 1./batchsize)
def updateGradientPos(self, layer1, layer2): cp.prod(self.w_tmp, layer1.act, layer2.act, 'n', 't') cp.reduce_to_col(self.blo_tmp, layer1.act) cp.reduce_to_col(self.bhi_tmp, layer2.act)
def downPass(self, layer1, layer2, sample): cp.prod(layer1.act, self.mat, layer2.act, 'n', 'n') layer1.postUpdateFromAbove(sample, bias = self.bias_lo)
def upPass(self, layer1, layer2, sample): cp.prod(layer2.act, self.mat, layer1.act, 't', 'n') layer2.postUpdateFromBelow(sample, bias = self.bias_hi)
import cuv_python as cp C = cp.dev_tensor_float_cm([2048,2048]) # column major tensor A = cp.dev_tensor_float_cm([2048,2048]) B = cp.dev_tensor_float_cm([2048,2048]) cp.fill(C,0) # fill with some defined values, not really necessary here cp.sequence(A) cp.sequence(B) cp.apply_binary_functor(B,A,cp.binary_functor.MULT) # elementwise multiplication B *= A # operators also work (elementwise) cp.prod(C,A,B,'n','t') # matrix multiplication C = cp.prod(A, B.T) # numpy-like form, allocates new matrix for result
def downPass(self, layer1, layer2, sample): cp.prod(layer1.act, self.mat, layer2.act, 'n', 'n') layer1.postUpdateFromAbove(sample, bias=self.bias_lo)
def upPass(self, layer1, layer2, sample): cp.prod(layer2.act, self.mat, layer1.act, 't', 'n') layer2.postUpdateFromBelow(sample, bias=self.bias_hi)
def forward(self): """Forward pass, calculates the activations of next neuron layer.""" cp.prod(self.target.activations, self.weight, self.source.activations) cp.matrix_plus_col(self.target.activations, self.bias) self.target.nonlinearity(self.target.activations)
def calculateDeltaWeights(self, derivative, input,oldWeights): result = cp.dev_tensor_float_cm(oldWeights.shape) cp.prod(result, input,derivative, 'n', 't') return result
import cuv_python as cp C = cp.dev_tensor_float_cm([2048, 2048]) # column major tensor A = cp.dev_tensor_float_cm([2048, 2048]) B = cp.dev_tensor_float_cm([2048, 2048]) cp.fill(C, 0) # fill with some defined values, not really necessary here cp.sequence(A) cp.sequence(B) cp.apply_binary_functor(B, A, cp.binary_functor.MULT) # elementwise multiplication B *= A # operators also work (elementwise) cp.prod(C, A, B, 'n', 't') # matrix multiplication C = cp.prod(A, B.T) # numpy-like form, allocates new matrix for result