def d_nonlinearity(self, input_): """Function applies nonlinear derivative on every element of the input. @param input_ -- input vector/matrix """ cp.apply_scalar_functor(input_, cp.scalar_functor.DTANH)
def test(self, input_matrix, teacher_matrix): """Function to test the network @param input_matrix -- matrix consisting of input data to the network. @param teacher_matrix -- matrix consisting of labels of input data . """ number_of_pictures = input_matrix.shape[-1] mse = 0 squared_errors = cp.dev_matrix_cmf(self.neuron_layer[-1].deltas.h, self.neuron_layer[-1].deltas.w) for batch in xrange(number_of_pictures/self.batch_size): index_begin = self.batch_size * batch index_end = index_begin + self.batch_size self.neuron_layer[0].activations = cp.push( input_matrix[:, index_begin:index_end].astype('float32').copy('F')) teachbatch = cp.push(teacher_matrix[:, index_begin:index_end].astype('float32').copy('F')) for i in xrange(self.number_of_layers): self.weight_layer[i].forward() cp.apply_binary_functor(squared_errors, self.neuron_layer[-1].deltas, cp.binary_functor.COPY) cp.apply_scalar_functor(squared_errors, cp.scalar_functor.SQUARE) mse += cp.sum(squared_errors) teachbatch.dealloc() print "MSE: ", (mse/number_of_pictures) squared_errors.dealloc()
def forward(self, input, weight, bias,linear=False): result = cp.dev_tensor_float_cm([weight.shape[1], input.shape[1]]) cp.fill(result,0) cp.prod(result, weight, input, "t", "n") cp.matrix_plus_col(result, bias) if not linear: cp.apply_scalar_functor(result, cp.scalar_functor.SIGM) return result
def forward(self, input, weight, bias, linear=False): result = cp.dev_tensor_float_cm([weight.shape[1], input.shape[1]]) cp.fill(result, 0) cp.prod(result, weight, input, "t", "n") cp.matrix_plus_col(result, bias) if not linear: cp.apply_scalar_functor(result, cp.scalar_functor.SIGM) return result
def prepare_dbg(self,mbatch_provider,Npoint,nsteps,eval_start,save_callback): """ Prepare the data for visualization """ print "Preparing data for visualization..." mbatch_provider.getMiniBatch(self.cfg.batchsize, self.layers[0].act) if eval_start == EvalStartType.trainingset: print "Starting Eval from Trainingset" pass elif eval_start == EvalStartType.vnoise: print "Starting Eval from VNoise" cp.fill_rnd_uniform(self.layers[0].act) cp.apply_scalar_functor(self.layers[0].act,cp.scalar_functor.MULT,0.3) elif eval_start == EvalStartType.h1noise: print "Starting Eval from H1Noise" cp.fill_rnd_uniform(self.layers[1].act) cp.apply_scalar_functor(self.layers[1].act,cp.scalar_functor.MULT,0.3) self.downPass(1,sample=True) self.dbg_datout = [] video = self.cfg.video for layer_num,layer in enumerate(self.layers[0:-1]): self.upPass(layer_num, sample=False) uq = UpdateQ(len(self.layers)) uq.push([1]) # start with some layer in between step = 0 while uq.minupdates([]) < nsteps: layernum = uq.pop(firstlayer=0) if video and layernum == 0: self.updateLayer(layernum,sample=False) self.save_fantasy(step, Npoint,save_callback, self.layers[0].act) self.updateLayer(layernum,sample=True) step+=1 while uq.minupdates([]) < nsteps+2: layernum = uq.pop(firstlayer=0) self.updateLayer(layernum,sample=False) self.updateLayer(0,sample=False) # pass up again before we save fantasies -- assures that we see bottom-up activities! for layer_num,layer in enumerate(self.layers[0:-1]): self.upPass(layer_num, sample=False) self.save_fantasy(nsteps+1,Npoint,save_callback, self.layers[0].act) self.dbg_sampleset = mbatch_provider.sampleset_[:, 0:Npoint].T print "Pulling Layer-Activations..." self.act = {} self.act_info = {} for l in xrange(1, self.cfg.num_layers): L = self.layers[l] if l<self.cfg.num_layers-1: self.act_info["%d-subs"%l] = dict(px=np.sqrt(L.size), py=np.sqrt(L.size)) self.act["%d-subs"%l] = L.act.np if self.weights[0].mat.shape[0] < 800*6: print "Trying to pull W0..." try: self.W=self.weights[0].mat.np if len(self.weights)>1: self.W1=self.weights[1].mat.np except MemoryError: print("weights too big!") print "done."
def __init__(self, cfg, weights,biases): self.cfg=cfg self.NumCorrect = 0 self.Errorrate=[] self.testError=[] self.NumberOfLayers = cfg.num_layers+1 self.preEpochHook = lambda mlp,epoch: mlp self.Weights = weights self.DeltaWeightsOld = [] self.WeightsLearnRate = [] self.dWeights = [] self.dBias = [] self.Bias = biases self.DeltaBiasOld = [] self.BiasLearnRate = [] l = 0.001 self.NumberOfNeuronsPerLayer = [] for i in xrange(self.NumberOfLayers-2): #self.Weights.append(newWeights) dim1, dim2 = self.Weights[i].shape self.createCopyFilled(self.DeltaWeightsOld,self.Weights[i] , 0) self.createCopyFilled(self.WeightsLearnRate,self.Weights[i] , l) if not self.cfg.finetune_online_learning or (self.cfg.finetune_online_learning and self.cfg.finetune_rprop): self.createCopyFilled(self.dWeights,self.Weights[i] , 0) self.createCopyFilled(self.dBias,self.Bias[i] , 0) self.createFilled(self.DeltaBiasOld, dim2, 1, 0) self.createFilled(self.BiasLearnRate, dim2, 1, l) self.NumberOfNeuronsPerLayer.append(dim1) # create dense matrix for last layer dim1,dim2 = self.Weights[-1].shape[1], self.cfg.num_classes if self.cfg.load and self.loadLastLayer(dim1,dim2): pass else: self.Weights.append(cp.dev_tensor_float_cm([dim1,dim2])) cp.fill_rnd_uniform(self.Weights[-1]) #print "Initializing weights with rnd(%2.5f)", cp.apply_scalar_functor(self.Weights[-1],cp.scalar_functor.SUBTRACT, 0.5) #cp.apply_scalar_functor(self.Weights[-1],cp.scalar_functor.MULT, 1./math.sqrt(self.Weights[-2].w)) cp.apply_scalar_functor(self.Weights[-1],cp.scalar_functor.MULT, 1./self.Weights[-2].shape[1]) self.createFilled(self.Bias, dim2, 1, 0) self.createFilled(self.DeltaBiasOld, dim2, 1, 0) self.createFilled(self.BiasLearnRate, dim2, 1, l) self.createFilled(self.DeltaWeightsOld,dim1,dim2,0) self.createFilled(self.WeightsLearnRate,dim1,dim2,l) if not self.cfg.finetune_online_learning or (self.cfg.finetune_online_learning and self.cfg.finetune_rprop): self.createCopyFilled(self.dWeights,self.Weights[-1] , 0) self.createCopyFilled(self.dBias,self.Bias[-1] , 0) self.NumberOfNeuronsPerLayer.append(dim1) self.NumberOfNeuronsPerLayer.append(dim2) self.reconstruction_error = []
def delta_hidden(self, weight, knownDerivative, netInput): deltaLo = cp.dev_tensor_float_cm([weight.shape[0], netInput.shape[1]]) cp.prod(deltaLo, weight, knownDerivative, 'n', 'n') help = netInput.copy() cp.apply_scalar_functor(help, cp.scalar_functor.DSIGM) cp.apply_binary_functor(deltaLo, help, cp.binary_functor.MULT) help.dealloc() return deltaLo
def delta_output(self, calculated, correct): derivative = cp.dev_tensor_float_cm([calculated.shape[0], correct.shape[1]]) h = cp.dev_tensor_float_cm(derivative.shape) cp.copy(derivative, calculated) cp.apply_scalar_functor(derivative, cp.scalar_functor.DSIGM) cp.copy(h, correct) cp.apply_binary_functor(h, calculated, cp.binary_functor.SUBTRACT) cp.apply_binary_functor(derivative, h, cp.binary_functor.MULT) h.dealloc() return derivative
def delta_output(self, calculated, correct): derivative = cp.dev_tensor_float_cm( [calculated.shape[0], correct.shape[1]]) h = cp.dev_tensor_float_cm(derivative.shape) cp.copy(derivative, calculated) cp.apply_scalar_functor(derivative, cp.scalar_functor.DSIGM) cp.copy(h, correct) cp.apply_binary_functor(h, calculated, cp.binary_functor.SUBTRACT) cp.apply_binary_functor(derivative, h, cp.binary_functor.MULT) h.dealloc() return derivative
def fit(self, input_matrix, teacher_matrix, n_epochs=100, learnrate = 0.10): """ Function to train the network @param input_matrix -- matrix consisting of input data to the network. @param teacher_matrix -- matrix consisting of labels of input data. @param n_epochs -- number of epochs the network is to be trained. """ n_samples = input_matrix.shape[-1] squared_errors = cp.dev_tensor_float_cm(self.neuron_layers[-1].deltas.shape) for r in xrange(n_epochs): print "Epoch ", r + 1, "/", n_epochs mse = 0.0 ce = 0.0 for batch in xrange(n_samples / self.batch_size): index_begin = self.batch_size * batch index_end = self.batch_size + index_begin # Push input and teacher to GPU memory # .copy("F") is needed since memory is non-contiguous self.neuron_layers[0].activations = cp.dev_tensor_float_cm( input_matrix[:, index_begin:index_end].copy('F')) teacher_batch_host = teacher_matrix[:, index_begin:index_end] teacher_batch = cp.dev_tensor_float_cm(teacher_batch_host.copy('F')) # Forward-Pass for i in xrange(self.n_layers): self.weight_layers[i].forward() # calculate error at output layer cp.copy(self.neuron_layers[-1].deltas, teacher_batch) self.neuron_layers[-1].deltas -= self.neuron_layers[-1].activations cp.copy(squared_errors, self.neuron_layers[-1].deltas) cp.apply_scalar_functor(squared_errors, cp.scalar_functor.SQUARE) mse += cp.sum(squared_errors) ce += float(np.sum(np.argmax(teacher_batch_host, axis=0) != np.argmax(self.neuron_layers[-1].activations.np, axis=0))) # Backward-Pass for i in xrange(self.n_layers): self.weight_layers[self.n_layers - i - 1].backward(learnrate, decay=.01) # Don't wait for garbage collector teacher_batch.dealloc() self.neuron_layers[0].activations.dealloc() print "MSE: ", (mse / n_samples) print "Classification Error Training: ", (ce / n_samples) squared_errors.dealloc()
def delta_outputSoftMax(self, calculated, correct): derivative = calculated.copy() cp.apply_scalar_functor(derivative, cp.scalar_functor.EXP) sums = cp.dev_tensor_float(calculated.shape[1]) cp.fill(sums,0) cp.reduce_to_row(sums, derivative, cp.reduce_functor.ADD) cp.apply_scalar_functor(sums,cp.scalar_functor.ADD,0.1/derivative.shape[0]) rv = cp.transposed_view(derivative) cp.matrix_divide_col(rv,sums) cp.apply_binary_functor(derivative, correct, cp.binary_functor.AXPBY, -1.,1.) sums.dealloc() return derivative
def __init__(self, source_layer, target_layer): """Constructor @param source_layer pointer to the previous neuron layer. @param target_layer pointer to the next neuron layer. """ self.source=source_layer self.target=target_layer dim1 = self.target.activations.h dim2 = self.source.activations.h self.weight = cp.get_filled_matrix(dim1, dim2, 0.0) cp.fill_rnd_uniform(self.weight) cp.apply_scalar_functor(self.weight, cp.scalar_functor.SUBTRACT, 0.5) cp.apply_scalar_functor(self.weight, cp.scalar_functor.DIV, 10) self.bias = cp.get_filled_matrix(dim1, 1, 0)
def delta_outputSoftMax(self, calculated, correct): derivative = calculated.copy() cp.apply_scalar_functor(derivative, cp.scalar_functor.EXP) sums = cp.dev_tensor_float(calculated.shape[1]) cp.fill(sums, 0) cp.reduce_to_row(sums, derivative, cp.reduce_functor.ADD) cp.apply_scalar_functor(sums, cp.scalar_functor.ADD, 0.1 / derivative.shape[0]) rv = cp.transposed_view(derivative) cp.matrix_divide_col(rv, sums) cp.apply_binary_functor(derivative, correct, cp.binary_functor.AXPBY, -1., 1.) sums.dealloc() return derivative
def partialsumV(self, actv, acth, row): """ sums out hidden variables for given v exp( log(exp(bh + actv*W)+1).sum(axis=0) + (v*bv).sum(axis=0) ) """ # acth = bv + actv*W cp.prod(acth, self.weight, actv, 't', 'n') cp.matrix_plus_col(acth, self.bh) # acth = log(exp(acth)+1) cp.apply_scalar_functor(acth, cp.scalar_functor.RECT, 1.0) # row = actv.sum(axis=0) cp.reduce_to_row(row, acth, cp.reduce_functor.ADD) # row += h*bh cp.matrix_times_col(actv, self.bv) cp.reduce_to_row(row, actv, cp.reduce_functor.ADD, 1.0, 1.0) # exp(row) m = row.np.astype("float64") return math.fsum(m.flatten()) / actv.shape[1]
def partialsumV(self, actv, acth, row): """ sums out hidden variables for given v exp( log(exp(bh + actv*W)+1).sum(axis=0) + (v*bv).sum(axis=0) ) """ # acth = bv + actv*W cp.prod(acth, self.weight, actv, "t", "n") cp.matrix_plus_col(acth, self.bh) # acth = log(exp(acth)+1) cp.apply_scalar_functor(acth, cp.scalar_functor.RECT, 1.0) # row = actv.sum(axis=0) cp.reduce_to_row(row, acth, cp.reduce_functor.ADD) # row += h*bh cp.matrix_times_col(actv, self.bv) cp.reduce_to_row(row, actv, cp.reduce_functor.ADD, 1.0, 1.0) # exp(row) m = row.np.astype("float64") return math.fsum(m.flatten()) / actv.shape[1]
def partialsum(self, acth, actv, row): """ sums out visible variables for given hidden variables exp( log(exp(bv + acth*W)+1).sum(axis=0) + (h*bh).sum(axis=0) ) """ # actv = bv + acth*W cp.prod(actv, self.weight, acth, 'n', 'n') cp.matrix_plus_col(actv, self.bv) # actv = log(exp(actv)+1) cp.apply_scalar_functor(actv, cp.scalar_functor.RECT, 1.0) # row = actv.sum(axis=0) cp.reduce_to_row(row, actv, cp.reduce_functor.ADD) # row += h*bh cp.matrix_times_col(acth, self.bh) cp.reduce_to_row(row, acth, cp.reduce_functor.ADD, 1.0, 1.0) #cp.prod(row,self.bv,actv,'t','n',1.0,1.0) # exp(row) m = row.np.astype("float64") return math.fsum(np.exp(m).flatten())
def partialsum(self, acth, actv, row): """ sums out visible variables for given hidden variables exp( log(exp(bv + acth*W)+1).sum(axis=0) + (h*bh).sum(axis=0) ) """ # actv = bv + acth*W cp.prod(actv, self.weight, acth, "n", "n") cp.matrix_plus_col(actv, self.bv) # actv = log(exp(actv)+1) cp.apply_scalar_functor(actv, cp.scalar_functor.RECT, 1.0) # row = actv.sum(axis=0) cp.reduce_to_row(row, actv, cp.reduce_functor.ADD) # row += h*bh cp.matrix_times_col(acth, self.bh) cp.reduce_to_row(row, acth, cp.reduce_functor.ADD, 1.0, 1.0) # cp.prod(row,self.bv,actv,'t','n',1.0,1.0) # exp(row) m = row.np.astype("float64") return math.fsum(np.exp(m).flatten())
def p_k(self,beta,tmp,tmp2,collect): cp.prod(tmp,self.v,self.baserate_bias,'t','n') cp.apply_scalar_functor(tmp,cp.scalar_functor.MULT,(1-beta)) collect(tmp) cp.prod(tmp2,self.w,self.v,'t','n') cp.matrix_plus_col(tmp2,self.bias_hi) cp.apply_scalar_functor(tmp2,cp.scalar_functor.MULT,beta) # RECT computes log(1+exp(x)) cp.apply_scalar_functor(tmp2,cp.scalar_functor.RECT,1) cp.reduce_to_row(tmp.T,tmp2,cp.reduce_functor.ADD) # tmp.T is an evil hack. it makes tmp into row major, which doesn't change anything since it's a vector any way. But vectors are always assumed to be row major. collect(tmp) cp.prod(tmp,self.v,self.bias_lo.T,'t','n') cp.apply_scalar_functor(tmp,cp.scalar_functor.MULT,beta) collect(tmp)
def normalize_255(self, batch): """ normalize by subtracting min and dividing by range""" cp.apply_scalar_functor(batch, cp.scalar_functor.DIV, 255.)
def nonlinearity(self): if not self.unit_type == UnitType.gaussian: cp.apply_scalar_functor(self.act, cp.scalar_functor.SIGM)
def prepare_dbg(self, mbatch_provider, Npoint, nsteps, eval_start, save_callback): """ Prepare the data for visualization """ print "Preparing data for visualization..." mbatch_provider.getMiniBatch(self.cfg.batchsize, self.layers[0].act) if eval_start == EvalStartType.trainingset: print "Starting Eval from Trainingset" pass elif eval_start == EvalStartType.vnoise: print "Starting Eval from VNoise" cp.fill_rnd_uniform(self.layers[0].act) cp.apply_scalar_functor(self.layers[0].act, cp.scalar_functor.MULT, 0.3) elif eval_start == EvalStartType.h1noise: print "Starting Eval from H1Noise" cp.fill_rnd_uniform(self.layers[1].act) cp.apply_scalar_functor(self.layers[1].act, cp.scalar_functor.MULT, 0.3) self.downPass(1, sample = True) self.dbg_datout = [] video = self.cfg.video for layer_num, layer in enumerate(self.layers[0:-1]): self.upPass(layer_num, sample = False) #if layer_num+2 < len(self.layers): #assert(False) num_meanfield = 100 for step in xrange(nsteps+num_meanfield): sample = not step>nsteps self.upPass(self.cfg.num_layers-2, sample = sample) if video: for lay_num in reversed(xrange(1, self.cfg.num_layers)): self.downPass(lay_num, sample = False) self.save_fantasy(step, Npoint, save_callback, self.layers[0].act) self.downPass(self.cfg.num_layers-1, sample = sample) for layer_num in reversed(xrange(1, self.cfg.num_layers)): self.downPass(layer_num, sample = False) layer = self.layers[layer_num-1] #for bla in xrange(1): # self.downPass(1, sample = False) # self.upPass(0, sample = False) # pass up again before we save fantasies -- assures that we see bottom-up activities! for layer_num, layer in enumerate(self.layers[0:-1]): self.upPass(layer_num, sample = False) #if layer_num+2 < len(self.layers): #assert(False) self.save_fantasy(nsteps+1, Npoint, save_callback, self.layers[0].act) self.dbg_sampleset = mbatch_provider.sampleset_[:, 0:Npoint].T print "Pulling Layer-Activations..." self.act = {} self.act_info = {} for l in xrange(1, self.cfg.num_layers): L = self.layers[l] if l<self.cfg.num_layers-1: self.act_info["%d-subs"%l] = dict(px = np.sqrt(L.size), py = np.sqrt(L.size)) self.act["%d-subs"%l] = L.act.np if self.weights[0].mat.shape[0] < 800*6: print "Trying to pull W0..." try: self.W = self.weights[0].mat.np if len(self.weights)>1: self.W1 = self.weights[1].mat.np except MemoryError: print("weights too big!") print "done."
def prepare_dbg(self, mbatch_provider, Npoint, nsteps, eval_start, save_callback): """ Prepare the data for visualization """ print "Preparing data for visualization..." mbatch_provider.getMiniBatch(self.cfg.batchsize, self.layers[0].act) if eval_start == EvalStartType.trainingset: print "Starting Eval from Trainingset" pass elif eval_start == EvalStartType.vnoise: print "Starting Eval from VNoise" cp.fill_rnd_uniform(self.layers[0].act) cp.apply_scalar_functor(self.layers[0].act, cp.scalar_functor.MULT, 0.3) elif eval_start == EvalStartType.h1noise: print "Starting Eval from H1Noise" cp.fill_rnd_uniform(self.layers[1].act) cp.apply_scalar_functor(self.layers[1].act, cp.scalar_functor.MULT, 0.3) self.downPass(1, sample=True) self.dbg_datout = [] video = self.cfg.video for layer_num, layer in enumerate(self.layers[0:-1]): self.upPass(layer_num, sample=False) uq = UpdateQ(len(self.layers)) uq.push([1]) # start with some layer in between step = 0 while uq.minupdates([]) < nsteps: layernum = uq.pop(firstlayer=0) if video and layernum == 0: self.updateLayer(layernum, sample=False) self.save_fantasy(step, Npoint, save_callback, self.layers[0].act) self.updateLayer(layernum, sample=True) step += 1 while uq.minupdates([]) < nsteps + 2: layernum = uq.pop(firstlayer=0) self.updateLayer(layernum, sample=False) self.updateLayer(0, sample=False) # pass up again before we save fantasies -- assures that we see bottom-up activities! for layer_num, layer in enumerate(self.layers[0:-1]): self.upPass(layer_num, sample=False) self.save_fantasy(nsteps + 1, Npoint, save_callback, self.layers[0].act) self.dbg_sampleset = mbatch_provider.sampleset_[:, 0:Npoint].T print "Pulling Layer-Activations..." self.act = {} self.act_info = {} for l in xrange(1, self.cfg.num_layers): L = self.layers[l] if l < self.cfg.num_layers - 1: self.act_info["%d-subs" % l] = dict(px=np.sqrt(L.size), py=np.sqrt(L.size)) self.act["%d-subs" % l] = L.act.np if self.weights[0].mat.shape[0] < 800 * 6: print "Trying to pull W0..." try: self.W = self.weights[0].mat.np if len(self.weights) > 1: self.W1 = self.weights[1].mat.np except MemoryError: print("weights too big!") print "done."
def finalize_stats(self): """ use N, mean and mean2 to generate data for normalization """ # mean := (mean/N)^2 cp.apply_scalar_functor(self.mean, cp.scalar_functor.MULT, 1. / self.N) sqmean = self.mean.copy() cp.apply_scalar_functor(sqmean, cp.scalar_functor.SQUARE) # mean2 -= mean2/n - squared_mean cp.apply_scalar_functor(self.mean2, cp.scalar_functor.MULT, 1. / self.N) cp.apply_binary_functor(self.mean2, sqmean, cp.binary_functor.SUBTRACT) # std is sqrt of difference cp.apply_scalar_functor(self.mean2, cp.scalar_functor.ADD, 0.01) # numerical stability cp.apply_scalar_functor(self.mean2, cp.scalar_functor.SQRT) self.std = self.mean2 sqmean.dealloc() # negate mean (so we can add it to normalize a matrix) cp.apply_scalar_functor(self.mean, cp.scalar_functor.MULT, -1.) self.negative_mean = self.mean # calculate range cp.apply_binary_functor(self.max, self.min, cp.binary_functor.SUBTRACT) cp.apply_scalar_functor(self.max, cp.scalar_functor.MAX, 1.) self.range = self.max # calculate negative min cp.apply_scalar_functor(self.range, cp.scalar_functor.ADD, 0.01) # numerical stability cp.apply_scalar_functor(self.min, cp.scalar_functor.MULT, -1.) self.negative_min = self.min assert not cp.has_nan(self.negative_mean) assert not cp.has_inf(self.negative_mean) assert not cp.has_nan(self.std) assert not cp.has_inf(self.std) assert not cp.has_nan(self.negative_min) assert not cp.has_inf(self.range)
class MLP: """ A Multi-Layer Perceptron """ def __init__(self, neurons, batch_size): """Constructor @param neurons -- array of sizes of layers. @param batch_size -- size of batch being used for training. """ self.number_of_layers = len(neurons) - 1 self.batch_size = batch_size self.neuron_layer = [] self.weight_layer = [] for i in xrange(self.number_of_layers+1): dim1 = neurons[i] self.neuron_layer.append(neuron_layer(dim1, self.batch_size )) for i in xrange(self.number_of_layers): self.weight_layer.append(weight_layer(self.neuron_layer[i], self.neuron_layer[i+1])) def train(self, input_matrix, teacher_matrix, number_of_epochs): """Function to train the network @param input_matrix -- matrix consisting of input data to the network. @param teacher_matrix -- matrix consisting of labels of input data. @param number_of_epochs -- number of rounds the network is to be trained. """ number_of_pictures = input_matrix.shape[-1] squared_errors = cp.dev_matrix_cmf(self.neuron_layer[-1].deltas.h, self.neuron_layer[-1].deltas.w) for r in xrange(number_of_epochs): print "Epoch ", r+1, "/", number_of_epochs mse = 0 for batch in xrange(number_of_pictures/self.batch_size): index_begin = self.batch_size * batch index_end = self.batch_size + index_begin # Push input and teacher to GPU memory self.neuron_layer[0].activations = cp.push( input_matrix[:,index_begin:index_end].astype('float32').copy('F')) teachbatch = cp.push( teacher_matrix[:,index_begin:index_end].astype('float32').copy('F')) # Forward-Pass for i in xrange(self.number_of_layers): self.weight_layer[i].forward() # calculate error at output layer cp.apply_binary_functor(self.neuron_layer[-1].deltas, teachbatch, cp.binary_functor.COPY) cp.apply_binary_functor(self.neuron_layer[-1].deltas, self.neuron_layer[-1].activations, cp.binary_functor.SUBTRACT) cp.apply_binary_functor(squared_errors, self.neuron_layer[-1].deltas, cp.binary_functor.COPY) cp.apply_scalar_functor(squared_errors, cp.scalar_functor.SQUARE) mse += cp.sum(squared_errors) # Backward-Pass for i in xrange(self.number_of_layers): self.weight_layer[self.number_of_layers-i-1].backward() # Don't wait for garbage collector teachbatch.dealloc() self.neuron_layer[0].activations.dealloc() print "MSE: ", (mse/number_of_pictures) squared_errors.dealloc()
def __init__(self, cfg, weights, biases): self.cfg = cfg self.NumCorrect = 0 self.Errorrate = [] self.testError = [] self.NumberOfLayers = cfg.num_layers + 1 self.preEpochHook = lambda mlp, epoch: mlp self.Weights = weights self.DeltaWeightsOld = [] self.WeightsLearnRate = [] self.dWeights = [] self.dBias = [] self.Bias = biases self.DeltaBiasOld = [] self.BiasLearnRate = [] l = 0.001 self.NumberOfNeuronsPerLayer = [] for i in xrange(self.NumberOfLayers - 2): #self.Weights.append(newWeights) dim1, dim2 = self.Weights[i].shape self.createCopyFilled(self.DeltaWeightsOld, self.Weights[i], 0) self.createCopyFilled(self.WeightsLearnRate, self.Weights[i], l) if not self.cfg.finetune_online_learning or ( self.cfg.finetune_online_learning and self.cfg.finetune_rprop): self.createCopyFilled(self.dWeights, self.Weights[i], 0) self.createCopyFilled(self.dBias, self.Bias[i], 0) self.createFilled(self.DeltaBiasOld, dim2, 1, 0) self.createFilled(self.BiasLearnRate, dim2, 1, l) self.NumberOfNeuronsPerLayer.append(dim1) # create dense matrix for last layer dim1, dim2 = self.Weights[-1].shape[1], self.cfg.num_classes if self.cfg.load and self.loadLastLayer(dim1, dim2): pass else: self.Weights.append(cp.dev_tensor_float_cm([dim1, dim2])) cp.fill_rnd_uniform(self.Weights[-1]) #print "Initializing weights with rnd(%2.5f)", cp.apply_scalar_functor(self.Weights[-1], cp.scalar_functor.SUBTRACT, 0.5) #cp.apply_scalar_functor(self.Weights[-1],cp.scalar_functor.MULT, 1./math.sqrt(self.Weights[-2].w)) cp.apply_scalar_functor(self.Weights[-1], cp.scalar_functor.MULT, 1. / self.Weights[-2].shape[1]) self.createFilled(self.Bias, dim2, 1, 0) self.createFilled(self.DeltaBiasOld, dim2, 1, 0) self.createFilled(self.BiasLearnRate, dim2, 1, l) self.createFilled(self.DeltaWeightsOld, dim1, dim2, 0) self.createFilled(self.WeightsLearnRate, dim1, dim2, l) if not self.cfg.finetune_online_learning or ( self.cfg.finetune_online_learning and self.cfg.finetune_rprop): self.createCopyFilled(self.dWeights, self.Weights[-1], 0) self.createCopyFilled(self.dBias, self.Bias[-1], 0) self.NumberOfNeuronsPerLayer.append(dim1) self.NumberOfNeuronsPerLayer.append(dim2) self.reconstruction_error = []
def normalize_255(self,batch): """ normalize by subtracting min and dividing by range""" cp.apply_scalar_functor(batch,cp.scalar_functor.DIV, 255.)
def finalize_stats(self): """ use N, mean and mean2 to generate data for normalization """ # mean := (mean/N)^2 cp.apply_scalar_functor(self.mean,cp.scalar_functor.MULT,1./self.N) sqmean = self.mean.copy() cp.apply_scalar_functor(sqmean, cp.scalar_functor.SQUARE) # mean2 -= mean2/n - squared_mean cp.apply_scalar_functor(self.mean2,cp.scalar_functor.MULT,1./self.N) cp.apply_binary_functor(self.mean2,sqmean,cp.binary_functor.SUBTRACT) # std is sqrt of difference cp.apply_scalar_functor(self.mean2,cp.scalar_functor.ADD,0.01) # numerical stability cp.apply_scalar_functor(self.mean2,cp.scalar_functor.SQRT) self.std = self.mean2 sqmean.dealloc() # negate mean (so we can add it to normalize a matrix) cp.apply_scalar_functor(self.mean,cp.scalar_functor.MULT,-1.) self.negative_mean = self.mean # calculate range cp.apply_binary_functor(self.max, self.min, cp.binary_functor.SUBTRACT) cp.apply_scalar_functor(self.max, cp.scalar_functor.MAX, 1.) self.range = self.max # calculate negative min cp.apply_scalar_functor(self.range,cp.scalar_functor.ADD,0.01) # numerical stability cp.apply_scalar_functor(self.min,cp.scalar_functor.MULT,-1.) self.negative_min = self.min assert not cp.has_nan(self.negative_mean) assert not cp.has_inf(self.negative_mean) assert not cp.has_nan(self.std) assert not cp.has_inf(self.std) assert not cp.has_nan(self.negative_min) assert not cp.has_inf(self.range)
def prepare_dbg(self, mbatch_provider, Npoint, nsteps, eval_start, save_callback): """ Prepare the data for visualization """ print "Preparing data for visualization..." mbatch_provider.getMiniBatch(self.cfg.batchsize, self.layers[0].act) if eval_start == EvalStartType.trainingset: print "Starting Eval from Trainingset" pass elif eval_start == EvalStartType.vnoise: print "Starting Eval from VNoise" cp.fill_rnd_uniform(self.layers[0].act) cp.apply_scalar_functor(self.layers[0].act, cp.scalar_functor.MULT, 0.3) elif eval_start == EvalStartType.h1noise: print "Starting Eval from H1Noise" cp.fill_rnd_uniform(self.layers[1].act) cp.apply_scalar_functor(self.layers[1].act, cp.scalar_functor.MULT, 0.3) self.downPass(1, sample=True) self.dbg_datout = [] video = self.cfg.video for layer_num, layer in enumerate(self.layers[0:-1]): self.upPass(layer_num, sample=False) #if layer_num+2 < len(self.layers): #assert(False) num_meanfield = 100 for step in xrange(nsteps + num_meanfield): sample = not step > nsteps self.upPass(self.cfg.num_layers - 2, sample=sample) if video: for lay_num in reversed(xrange(1, self.cfg.num_layers)): self.downPass(lay_num, sample=False) self.save_fantasy(step, Npoint, save_callback, self.layers[0].act) self.downPass(self.cfg.num_layers - 1, sample=sample) for layer_num in reversed(xrange(1, self.cfg.num_layers)): self.downPass(layer_num, sample=False) layer = self.layers[layer_num - 1] #for bla in xrange(1): # self.downPass(1, sample = False) # self.upPass(0, sample = False) # pass up again before we save fantasies -- assures that we see bottom-up activities! for layer_num, layer in enumerate(self.layers[0:-1]): self.upPass(layer_num, sample=False) #if layer_num+2 < len(self.layers): #assert(False) self.save_fantasy(nsteps + 1, Npoint, save_callback, self.layers[0].act) self.dbg_sampleset = mbatch_provider.sampleset_[:, 0:Npoint].T print "Pulling Layer-Activations..." self.act = {} self.act_info = {} for l in xrange(1, self.cfg.num_layers): L = self.layers[l] if l < self.cfg.num_layers - 1: self.act_info["%d-subs" % l] = dict(px=np.sqrt(L.size), py=np.sqrt(L.size)) self.act["%d-subs" % l] = L.act.np if self.weights[0].mat.shape[0] < 800 * 6: print "Trying to pull W0..." try: self.W = self.weights[0].mat.np if len(self.weights) > 1: self.W1 = self.weights[1].mat.np except MemoryError: print("weights too big!") print "done."
def sample_markov_chains(self,beta,step): cp.prod(self.h,self.w,self.v,'t','n') cp.matrix_plus_col(self.h,self.bias_hi) cp.apply_scalar_functor(self.h,cp.scalar_functor.MULT,beta) cp.apply_scalar_functor(self.h,cp.scalar_functor.SIGM) cp.rnd_binarize(self.h) cp.prod(self.v,self.w,self.h,'n','n') cp.matrix_plus_col(self.v,self.bias_lo) cp.apply_scalar_functor(self.v,cp.scalar_functor.MULT,beta) cp.apply_scalar_functor(self.baserate_bias,cp.scalar_functor.MULT,1-beta) cp.matrix_plus_col(self.v,self.baserate_bias) cp.apply_scalar_functor(self.baserate_bias,cp.scalar_functor.MULT,1.0/(1-beta)) cp.apply_scalar_functor(self.v,cp.scalar_functor.SIGM) #if step % 100 == 0: #plt.figure(1) #self.v_=self.v.np #showthis = self.v_.copy() #plt.matshow(showthis[:,0].reshape((28,28))) #plt.draw() #if not os.path.exists("/tmp/%s"%os.getlogin()): #os.mkdir("/tmp/%s"%os.getlogin()) #plt.savefig("/tmp/%s/chain_%05d.png"%(os.getlogin(),step)) cp.rnd_binarize(self.v)