def setup(shape): """Creates two matrices and corresponding row/column vectors""" mat = cmt.empty(shape).fill_with_randn() mat2 = cmt.empty(shape).fill_with_randn() col = cmt.empty((shape[0], 1)).assign(0) row = cmt.empty((1, shape[1])).assign(0) return mat, mat2, col, row
def dream(self, k=10): """ Generate a pattern from this network. Return ------ A new CUDAMatrix with the shape of the first layer """ last_layer = self.layers[-1] v = cm.empty(last_layer.visible_bias.shape) h = cm.empty(last_layer.hidden_bias.shape) v_mean = cm.empty(last_layer.visible_bias.shape) h_mean = cm.empty(last_layer.hidden_bias.shape) h.fill_with_rand() for _ in xrange(k): last_layer.sample_visible(h, v_mean, v) last_layer.sample_hidden(v, h_mean, h) v.free_device_memory() v_mean.free_device_memory() h_mean.free_device_memory() return self.reverse_transform(h)
def __init__(self): self.m = 10 self.sizes = [3000,50,3] #self.thetas = map(lambda x: cm.CUDAMatrix(x), neur.create_initial_thetas(self.sizes, 0.12)) self.thetas = map(lambda x: cm.empty((self.sizes[x + 1], self.sizes[x] + 1)).fill_with_rand(), range(len(self.sizes) - 1)) self.activ_layers = map(lambda x: cm.CUDAMatrix(np.zeros((self.m, x + 1))), self.sizes[0:-1]) self.activ_layers.append(cm.CUDAMatrix(np.zeros((self.m, self.sizes[-1])))) for i in range(len(self.sizes)): self.activ_layers[i].set_col_slice(0,1,1) self.activ_layers_temp = map(lambda x: cm.empty((self.m, x + 1)), self.sizes[0:-1]) self.activ_layers_temp.append(cm.empty((self.m, self.sizes[-1]))) self.layer_expand_mask = map(lambda x: cm.CUDAMatrix(np.hstack([np.zeros((x, 1)), np.eye(x)])), self.sizes[0:-1]) size1 = self.sizes[0] + 1 clear = np.zeros((size1,size1)) clear[0,0] = 1 self.clear_vec = cm.CUDAMatrix(clear[0:size1,0:size1]) #print self.clear_vec.shape self.clear_vec2 = cm.CUDAMatrix(clear[0:51,0:51]) self.z = [0] * (len(self.thetas) + 1) self.z[1] = cm.empty((self.m, 50)) self.z[2] = cm.empty((self.m, 3))
def AllocateBatchsizeDependentMemory(self, batchsize): self.batchsize = batchsize if self.data: self.data.free_device_memory() if self.deriv: self.deriv.free_device_memory() dimensions = self.dimensions numlabels = self.numlabels if self.is_input or self.is_initialized or self.is_output: if self.activation == deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX: self.data = cm.CUDAMatrix(np.zeros((numlabels * dimensions, batchsize))) else: self.data = cm.CUDAMatrix(np.zeros((dimensions, batchsize))) self.deriv = cm.CUDAMatrix(np.zeros((numlabels * dimensions, batchsize))) self.state = cm.CUDAMatrix(np.zeros((numlabels * dimensions, batchsize))) self.temp = cm.CUDAMatrix(np.zeros((dimensions, batchsize))) if self.activation == deepnet_pb2.Hyperparams.SOFTMAX or\ self.activation == deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX: self.expanded_batch = cm.CUDAMatrix(np.zeros( (numlabels * dimensions, batchsize))) if self.loss_function == deepnet_pb2.Layer.CROSS_ENTROPY: if self.activation == deepnet_pb2.Hyperparams.SOFTMAX: self.temp2 = cm.CUDAMatrix(np.zeros((dimensions, batchsize))) self.indices = cm.CUDAMatrix(np.zeros((1, dimensions * batchsize))) self.rowshift = cm.CUDAMatrix( numlabels*np.arange(dimensions * batchsize).reshape(1, -1)) if self.activation == deepnet_pb2.Hyperparams.REPLICATED_SOFTMAX: #norm_to = self.hyperparams.normalize_to self.NN = cm.CUDAMatrix(np.ones((1, batchsize))) self.counter = cm.empty(self.NN.shape) # Used for sampling softmaxes. self.count_filter = cm.empty(self.NN.shape) # Used for sampling softmaxes. if self.hyperparams.dropout: self.mask = cm.CUDAMatrix(np.zeros(self.state.shape))
def calc_hidden_probs(data, vh, hb, batchsize): """ Calculate the probs in the next layer up given data, and weights """ if data.shape[0] % batchsize != 0: print "WARNING! Batchsize for calc_hidden_probs is not an even divisor of example cnt." print "Calculating probs. of hidden layer, " + str( data.shape[0]) + " examples." dev_data = cm.CUDAMatrix(data) lrp_data = np.empty((data.shape[0], vh.shape[1])) cur_data = cm.empty((batchsize, vh.shape[0])) nex_data = cm.empty((batchsize, vh.shape[1])) vishid = cm.CUDAMatrix(vh) hid_bias = cm.CUDAMatrix(hb) num_batches = data.shape[0] / batchsize for batch in range(0, num_batches): cur_data = dev_data.get_row_slice(batch * batchsize, (batch + 1) * batchsize) cm.dot(cur_data, vishid, target=nex_data) nex_data.add_row_vec(hid_bias) nex_data.apply_sigmoid() nex_data.copy_to_host() lrp_data[batch * batchsize:(batch + 1) * batchsize, :] = nex_data.asarray().copy() return lrp_data
def __init__(self, input_dim, output_dim, spectral_radius=.9, leak_rate=1, input_scaling=1, bias_scaling=0): super(CUDAReservoirNode, self).__init__( input_dim=input_dim, output_dim=output_dim, ) self.input_dim = input_dim self.output_dim = output_dim self.leak_rate = leak_rate w = mdp.numx.random.normal(0, 1, (output_dim, output_dim)) w_in = mdp.numx.random.uniform(-1, 1, (output_dim, input_dim)) * input_scaling if output_dim < 1500: l = mdp.numx.linalg.eigvals(w) r = mdp.numx.amax(mdp.numx.absolute(l)) w = w * (spectral_radius / r) self.w = cm.CUDAMatrix(w) else: self.w = cm.CUDAMatrix(w) r = get_specrad(self.w) self.w.mult(spectral_radius / r) bias = mdp.numx.random.normal(0, 1, (output_dim, 1)) * bias_scaling self.w_in = cm.CUDAMatrix(w_in) self.bias = cm.CUDAMatrix(bias) self.current_state = cm.empty((self.output_dim, 1)) self.new_state = cm.empty((self.output_dim, 1))
def __init__(self): self.m = 10 self.sizes = [3000, 50, 3] #self.thetas = map(lambda x: cm.CUDAMatrix(x), neur.create_initial_thetas(self.sizes, 0.12)) self.thetas = map( lambda x: cm.empty( (self.sizes[x + 1], self.sizes[x] + 1)).fill_with_rand(), range(len(self.sizes) - 1)) self.activ_layers = map( lambda x: cm.CUDAMatrix(np.zeros((self.m, x + 1))), self.sizes[0:-1]) self.activ_layers.append( cm.CUDAMatrix(np.zeros((self.m, self.sizes[-1])))) for i in range(len(self.sizes)): self.activ_layers[i].set_col_slice(0, 1, 1) self.activ_layers_temp = map(lambda x: cm.empty((self.m, x + 1)), self.sizes[0:-1]) self.activ_layers_temp.append(cm.empty((self.m, self.sizes[-1]))) self.layer_expand_mask = map( lambda x: cm.CUDAMatrix(np.hstack([np.zeros( (x, 1)), np.eye(x)])), self.sizes[0:-1]) size1 = self.sizes[0] + 1 clear = np.zeros((size1, size1)) clear[0, 0] = 1 self.clear_vec = cm.CUDAMatrix(clear[0:size1, 0:size1]) #print self.clear_vec.shape self.clear_vec2 = cm.CUDAMatrix(clear[0:51, 0:51]) self.z = [0] * (len(self.thetas) + 1) self.z[1] = cm.empty((self.m, 50)) self.z[2] = cm.empty((self.m, 3))
def _sample_h(self, v, x, sample=False, x_is_bias=False): # updates self.h # self.h = cm.empty((v.shape[0], self.output_dim)) if x_is_bias: # Bias is precalculated self.h.assign(x) else: cm.dot(x, self.bg, self.h) self.h.add_dot(v, self.wg) # This is a 100 times faster than calling 'add_row_vec' to add biases. ones_cut = self._ones.get_col_slice(0, v.shape[0]) self.h.add_dot(ones_cut.T, self.bhg) self.h.apply_sigmoid2(self.h) if sample: # Sample random values sampled = cm.empty((v.shape[0], self.output_dim)) sampled.fill_with_rand() # Sample values of hiddens sampled.less_than(self.h, self.h)
def test_allfinite(): a = cm.empty((10, 20)).assign(1).divide(0) # NaN b = cm.empty((10, 20)).assign(1e20).mult(1e20) # Inf c = cm.empty((10, 20)).assign(1) # 1.0 assert (not a.allfinite()) and (not b.allfinite()) and c.allfinite( ), "CUDAMatrix.allfinite does not work"
def AllocateBatchsizeDependentMemory(self, batchsize): if self.data: self.data.free_device_memory() if self.deriv: self.deriv.free_device_memory() self.batchsize = batchsize dimensions = self.dimensions numlabels = self.numlabels numdims = dimensions * numlabels self.statesize = cm.CUDAMatrix(np.zeros((numdims, batchsize))) self.batchsize_temp = cm.CUDAMatrix(np.zeros((1, batchsize))) if self.t_op: if self.t_op.optimizer == deepnet_pb2.Operation.PCD: self.pos_state = cm.CUDAMatrix(np.zeros((numdims, batchsize))) self.pos_sample = cm.CUDAMatrix(np.zeros((numdims, batchsize))) self.neg_state = cm.CUDAMatrix(np.zeros((numdims, batchsize))) self.neg_sample = cm.CUDAMatrix(np.zeros((numdims, batchsize))) self.state = self.pos_state self.sample = self.pos_sample self.suff_stats = cm.empty((numdims, 1)) elif self.t_op.optimizer == deepnet_pb2.Operation.CD: self.state = cm.CUDAMatrix(np.zeros((numdims, batchsize))) self.sample = cm.CUDAMatrix(np.zeros((numdims, batchsize))) self.suff_stats = cm.empty((numdims, 1)) else: self.state = cm.CUDAMatrix(np.zeros((numdims, batchsize))) self.deriv = cm.CUDAMatrix(np.zeros((numdims, batchsize))) else: self.state = cm.CUDAMatrix(np.zeros((numdims, batchsize))) if self.is_input or self.is_initialized or self.is_output: self.data = cm.CUDAMatrix(np.zeros((dimensions, batchsize))) if self.hyperparams.dropout: self.mask = cm.CUDAMatrix(np.zeros(self.state.shape))
def calc_cost(self, X1, X2): P1 = cm.dot(self.Wgpu.T, X1) P2 = cm.dot(self.Wgpu.T, X2) Y1 = cm.dot(self.Wgpu, P1) Y2 = cm.dot(self.Wgpu, P1) Y1.subtract(X1) Y2.subtract(X2) PD = cm.empty(P1.shape) XD = cm.empty(X1.shape) P1.subtract(P2, target = PD) X1.subtract(X2, target = XD) grad = cm.dot(XD, PD.T) grad.mult(self.lam) grad.add(cm.dot(Y1, P1.T)) grad.add(cm.dot(Y2, P2.T)) grad.add(cm.dot(X1, cm.dot(Y1.T, self.Wgpu))) grad.add(cm.dot(X2, cm.dot(Y2.T, self.Wgpu))) grad.divide(X1.shape[1]) PD.mult(PD) Y1.mult(Y1) Y2.mult(Y2) cost = PD.sum(axis = 0) cost.mult(self.lam) cost.add(Y1.sum(axis = 0)) cost.add(Y2.sum(axis = 0)) cost = cost.sum(axis = 1) cost.divide(X1.shape[1] * 2) return [cost, grad]
def train(self, X1, X2, max_epoch): self.train_init() datasz = X1.shape[1] X1gpu = cm.CUDAMatrix(X1) X2gpu = cm.CUDAMatrix(X2) X1sub = cm.empty((self.data_dim, self.batchsz)) X2sub = cm.empty((self.data_dim, self.batchsz)) tic = time.time() for epoch in range(max_epoch): if epoch == 5: self.moment = 0.9 for n in range(datasz / self.batchsz): X1gpu.get_col_slice(self.batchsz * n, self.batchsz * (n+1), target = X1sub) X2gpu.get_col_slice(self.batchsz * n, self.batchsz * (n+1), target = X2sub) cost = self.train_step(X1sub, X2sub) print "epoch %d cost %f" % (epoch, cost.asarray()[0,0]) toc = time.time() print "time %s" % (toc - tic) self.train_finalize()
def __init__(self, layer_type, input_size, output_size, learning_rate, activation): self.layer_type = layer_type self.input_size = input_size self.output_size = output_size self.learning_rate = learning_rate self.act = self.act_func(activation) self.d_act = self.d_act_func(activation) self.output = None self.W = None self.b = None self.z = None self.forward = None self.backward = None if self.layer_type == "fc": self.W = np.random.rand(self.input_size, self.output_size) * 1 - 0.5 self.b = np.random.rand(1, self.output_size) * 1 - 0.5 self.W = cm.CUDAMatrix(self.W) self.b = cm.CUDAMatrix(self.b) self.z = cm.empty((1, self.output_size)) self.d_act_z = cm.empty((1, self.output_size)) self.output = cm.empty((1, self.output_size)) self.delta_b = cm.empty((1, self.output_size)) self.delta_W = cm.empty((self.input_size, self.output_size)) self.output_bp = None # cm.empty((1, self.input_size)) self.forward = self.dense_fw self.backward = self.dense_bw
def _calculate_moments_ns(self, x, ws, quick=False): """Calculate moments based on the weights and samples. We also calculate and save MI, TC, additivity, and the value of the objective. Note it is assumed that <X_i^2> = 1! """ m = {} # Dictionary of moments if self.gpu: y = cm.empty((self.n_samples, self.m)) wc = cm.CUDAMatrix(ws) cm.dot(x, wc.T, target=y) # + noise, but it is included analytically del wc tmp_sum = np.einsum( 'lj,lj->j', y.asarray(), y.asarray()) # TODO: Should be able to do on gpu... else: y = x.dot(ws.T) tmp_sum = np.einsum('lj,lj->j', y, y) m["uj"] = ( 1 - self.eps**2) * tmp_sum / self.n_samples + self.eps**2 * np.sum( ws**2, axis=1) if quick and np.max(m["uj"]) >= 1.: return False if self.gpu: tmp = cm.empty((self.nv, self.m)) cm.dot(x.T, y, target=tmp) tmp_dot = tmp.asarray() del tmp del y else: tmp_dot = x.T.dot(y) m["rho"] = ( 1 - self.eps** 2) * tmp_dot.T / self.n_samples + self.eps**2 * ws # m by nv m["ry"] = ws.dot(m["rho"].T) # normalized covariance of Y m["Y_j^2"] = self.yscale**2 / (1. - m["uj"]) np.fill_diagonal(m["ry"], 1) m["invrho"] = 1. / (1. - m["rho"]**2) m["rhoinvrho"] = m["rho"] * m["invrho"] m["Qij"] = np.dot(m['ry'], m["rhoinvrho"]) m["Qi"] = np.einsum('ki,ki->i', m["rhoinvrho"], m["Qij"]) #m["Qi-Si^2"] = np.einsum('ki,ki->i', m["rhoinvrho"], m["Qij"]) m["Si"] = np.sum(m["rho"] * m["rhoinvrho"], axis=0) # This is the objective, a lower bound for TC m["TC"] = np.sum(np.log(1 + m["Si"])) \ - 0.5 * np.sum(np.log(1 - m["Si"]**2 + m["Qi"])) \ + 0.5 * np.sum(np.log(1 - m["uj"])) if not quick: m["MI"] = -0.5 * np.log1p(-m["rho"]**2) m["X_i Y_j"] = m["rho"].T * np.sqrt(m["Y_j^2"]) m["X_i Z_j"] = np.linalg.solve(m["ry"], m["rho"]).T m["X_i^2 | Y"] = ( 1. - np.einsum('ij,ji->i', m["X_i Z_j"], m["rho"])).clip(1e-6) m['I(Y_j ; X)'] = 0.5 * np.log(m["Y_j^2"]) - 0.5 * np.log( self.yscale**2) m['I(X_i ; Y)'] = -0.5 * np.log(m["X_i^2 | Y"]) m["TCs"] = m["MI"].sum(axis=1) - m['I(Y_j ; X)'] m["additivity"] = (m["MI"].sum(axis=0) - m['I(X_i ; Y)']).sum() return m
def _init_params(self, batch_size): self.next_z = cm.empty((batch_size, self.next_size)) self.next_single_z = cm.empty((1, self.next_size)) if self.level != 1: self.my_delta = cm.empty((batch_size, self.size)) else: self.my_delta = None
def initParams(self): """ Initialize parameters using 6/sqrt(fanin+fanout) """ sizes = [self.inputDim]+self.layerSizes+[self.outputDim] scales = [np.sqrt(6)/np.sqrt(n+m) for n,m in zip(sizes[:-1],sizes[1:])] self.stack = [[np.random.rand(m,n)*2*s-s,np.zeros((m,1))] \ for n,m,s in zip(sizes[:-1],sizes[1:],scales)] self.hActs_M = [cm.empty((s,self.maxBatch)) for s in sizes] if self.train: # Now assuming that all layers are the same size self.grad = [[cm.empty(w.shape),cm.empty(b.shape)] for w,b in self.stack] self.deltasC_M = cm.empty((self.outputDim,self.maxBatch)) self.deltasOut_M = cm.empty((sizes[1],self.maxBatch)) self.deltasIn_M = cm.empty((sizes[1],self.maxBatch)) self.tmpGrad_M = cm.empty((self.layerSizes[0],self.maxBatch)) # Allocate memory once here and reuse # Store probs self.probs_M = cm.empty((self.outputDim,self.maxBatch)) # Store col max self.rowVec_M = cm.empty((1,self.maxBatch)) self.stack = [[cm.CUDAMatrix(w),cm.CUDAMatrix(b)] for w,b in self.stack]
def LoadParams(self, tied_to=None): """Load the parameters for this edge. Load the parameters if present in self.proto. Otherwise initialize them appropriately. """ node1 = self.node1 node2 = self.node2 proto = self.proto self.hyperparams = proto.hyperparams param_names = [param.name for param in proto.param] for param in proto.param: if param.conv or param.local: n_locs = self.AllocateMemoryForConvolutions( param, node1, node2) if not param.dimensions: if param.conv: cv = param.conv_params dims = [cv.num_filters, cv.size**2 * cv.num_colors] elif param.local: dims = [ cv.num_filters, n_locs**2 * cv.size**2 * cv.num_colors ] else: dims = [ node1.numlabels * node1.dimensions, node2.numlabels * node2.dimensions ] param.dimensions.extend(dims) if tied_to: if self.transpose: self.params[param.name] = tied_to.params[param.name].T else: self.params[param.name] = tied_to.params[param.name] mat = self.params[param.name] else: if param.mat: # and 'grad' not in param.name: mat = util.ParameterAsNumpy(param) else: mat = self.InitializeParameter(param) self.params[param.name] = cm.CUDAMatrix(mat) if param.name == 'weight': self.temp = cm.empty(mat.shape) #self.temp2 = cm.empty(mat.shape) self.gradient = cm.empty(mat.shape) self.grad_weight = cm.empty(mat.shape) self.gradient.assign(0) self.grad_weight.assign(0) if self.t_op and (self.t_op.optimizer == deepnet_pb2.Operation.PCD or \ self.t_op.optimizer == deepnet_pb2.Operation.CD): self.suff_stats = cm.empty( (self.node1.numlabels * self.node1.dimensions, self.node2.numlabels * self.node2.dimensions))
def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, numInnerItermax=200, stopInnerThr=1e-9, verbose=False, log=False): p = 0.5 epsilon = 1e-3 Nfin = len(b) indices_labels = [] classes = np.unique(labels_a) for c in classes: idxc, = np.where(labels_a == c) indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1))) Mreg_GPU = cudamat.empty(M_GPU.shape) W_GPU = cudamat.empty(M_GPU.shape).assign(0) for cpt in range(numItermax): Mreg_GPU.assign(M_GPU) Mreg_GPU.add_mult(W_GPU, eta) transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax, stopThr=stopInnerThr, returnAsGPU=True) # the transport has been computed. Check if classes are really # separated W_GPU.assign(1) W_GPU = W_GPU.transpose() for (i, c) in enumerate(classes): (_, nbRow) = indices_labels[i].shape tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0) transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU) majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon) cudamat.pow(majs_GPU, (p - 1)) majs_GPU.mult(p) tmpC_GPU.assign(0) tmpC_GPU.add_col_vec(majs_GPU) W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU) W_GPU = W_GPU.transpose() return transp_GPU.asarray()
def setVariables(self): n, m, r = self.n, self.m, self.rank self.H_gpu = cm.CUDAMatrix(self.H) self.W_gpu = cm.CUDAMatrix(self.W) self.X_gpu = cm.CUDAMatrix(self.X) self.Wrowsum_gpu = cm.empty([r,1]) self.WH_gpu = cm.empty([n,m]) self.XWH_gpu = self.WH_gpu self.WTXWH_gpu = cm.empty([r,m]) self.Hcolsum_gpu = cm.empty([1,r]) self.XWHHT_gpu = cm.empty([n,r])
def TranslateData(self, batch, i): """Applies translations to data at index i in batch.""" sizeX = self.sizeX sizex = self.sizex batchsize = batch[i].shape[1] shift = (sizeX - sizex) / 2 offset_x = np.array([ random.choice(self.translate_range_x) + shift for k in range(batchsize) ]).reshape(1, -1) offset_y = np.array([ random.choice(self.translate_range_y) + shift for k in range(batchsize) ]).reshape(1, -1) num_channels = self.num_channels d = batch[i] if self.offset_x is None: self.offset_x = cm.CUDAMatrix(offset_x) else: self.offset_x.overwrite(offset_x) if self.offset_y is None: self.offset_y = cm.CUDAMatrix(offset_y) else: self.offset_y.overwrite(offset_y) if self.translated_d is None or self.translated_d.shape[1] != batchsize: self.translated_d = cm.empty((sizex**2 * num_channels, batchsize)) d.generate_translations(sizeX, sizex, self.offset_x, self.offset_y, target=self.translated_d) batch[i] = self.translated_d
def get_specrad(Ac): """Get spectral radius of A using the power method.""" m_size = Ac.shape[0] x = np.random.normal(0, 1, (m_size, 1)) x = x / np.linalg.norm(x) x = cm.CUDAMatrix(x) y = cm.empty((m_size, 1)) diff = 200 eps = 1e-3 b = 1e10 c = 1e9 max_its = 1e6 n_its = 0 while diff > eps and n_its < max_its: cm.dot(Ac, x, target=y) norm = y.euclid_norm() y.divide(norm, target=x) a = cm.dot(y.T, x).asarray() c = cm.dot(x.T, x).asarray() diff = np.abs(a - b) b = float(a) n_its += 1 specrad = float(a / c) print 'Spectral radius:', specrad, 'Number of iterations:', n_its return float(a / c)
def getDataUpwards(self,loadfolder,savefolder): # push data of visible layer upwards to form a set of new data # because of memory issues, we have to each batch data to disc and read and combine them later # batch mode receive data from cpu and return a matrix on cpu datalist = os.listdir(loadfolder) batchsize = 4096 n = 0 for dataname in datalist: name = os.path.join(loadfolder,dataname) mdict = scipy.io.loadmat(name) data = mdict['data'] labels = mdict['label'] # print labels.shape numbatch = data.shape[1]/batchsize for batch in range(numbatch): #print 'batch %d/%d'%(n, numbatch*len(datalist)) batchdata = data[:,batch*batchsize:(batch+1)*batchsize] batchlabels = labels[batch*batchsize:(batch+1)*batchsize] temp = cm.empty((self.num_hid,batchdata.shape[1])) vis = cm.CUDAMatrix(batchdata) self.hidActProb(vis, temp) temp.copy_to_host() #topdata[:,batch*batchsize:(batch+1)*batchsize] = temp.numpy_array mdict = {} mdict['data'] = temp.numpy_array mdict['label'] = batchlabels scipy.io.savemat('%s/%d.mat'%(savefolder,n),mdict) n = n+1
def __init__(self, hidden_dim, visible_dim=None, context_dim=None, gaussian=False, dtype=None, max_batch_size=1500): """ Arguments: - hidden_dim: number of hidden variables - visible_dim: number of observed variables - context_dim: number of context variables - gaussian: use gaussian visible units (default is binary) - max_batch_size: number of datapoints to process simultaneously The max batch size is required to be able to optimize the adding of bias vectors by preallocating a vector of ones and taking the outer product with the bias vector. This is faster than directly adding it somehow. """ super(RBMNode, self).__init__(hidden_dim, visible_dim + context_dim, dtype) self._input_dim = visible_dim + context_dim self._output_dim = hidden_dim self.context_dim = context_dim self.visible_dim = visible_dim self._initialized = False self._ones = cm.empty((1, max_batch_size)) self._ones.assign(1) self._gaussian = gaussian
def GetErrors(self, raaX, raaY, sActivation): # Small value to avoid log underflows rEps = 1e-20 raaError = cudamat.empty(raaX.shape) raaX.subtract(raaY, raaError) raaError.mult(raaError) raError = raaError.sum(axis=0) rError = raError.sum(axis=1) rSe = rError.asarray()[0,0] #print(rSe) # # Sum all squared errors # rSe = numpy.sum(numpy.square(raaError)) # # Depending on the activation def type # if(sActivation=="Logistic"): # # Compute the average cross entropy error # rE = -numpy.sum(numpy.multiply(raaX,numpy.log(raaY+rEps)) + numpy.multiply(1-raaX,numpy.log(1-raaY+rEps))) # elif(sActivation=="Linear"): # # Compute the squared error # rE = rSe # elif(sActivation=="Softmax"): # # Compute the average cross entropy error # rE = -numpy.sum(numpy.multiply(raaX,numpy.log(raaY+rEps))) rE = 0 return(rSe, rE)
def Sparcity(M,psi): N = cm.empty(M.shape) N.assign(np.random.choice([0,1],M.size,p =[psi,1-psi])) T = np.empty(M.shape) N.mult(M,target = T) return T
def __init__(self, hidden_dim, visible_dim=None, context_dim=None, gaussian=False, dtype=None, max_batch_size=500): """ Arguments: - hidden_dim: number of hidden variables - visible_dim: number of observed variables - context_dim: number of context variables - gaussian: use gaussian visible units (default is binary) - max_batch_size: number of datapoints to process simultaneously The max batch size is required to be able to optimize the adding of bias vectors by preallocating a vector of ones and taking the outer product with the bias vector. This is faster than directly adding it somehow. """ super(RBMNode, self).__init__(hidden_dim, visible_dim + context_dim, dtype) self._input_dim = visible_dim + context_dim self._output_dim = hidden_dim self.context_dim = context_dim self.visible_dim = visible_dim self._initialized = False self._ones = cm.empty((1, max_batch_size)) self._ones.assign(1) self._gaussian = gaussian # TODO: Should probably use **kwargs for these arguments and figure out # whether to keep the leak_rate and bias in there for performance # reasons. self.reservoir = Oger.nodes.CUDAReservoirNode(visible_dim, context_dim, spectral_radius=.01, leak_rate=1, input_scaling=.001)
def LoadParams(self, tied_to=None): """Load the parameters for this edge. Load the parameters if present in self.proto. Otherwise initialize them appropriately. """ node1 = self.node1 node2 = self.node2 proto = self.proto self.hyperparams = proto.hyperparams param_names = [param.name for param in proto.param] for param in proto.param: if param.conv or param.local: n_locs = self.AllocateMemoryForConvolutions(param, node1, node2) if not param.dimensions: if param.conv: cv = param.conv_params dims = [cv.num_filters, cv.size**2 * cv.num_colors] elif param.local: dims = [cv.num_filters, n_locs**2 * cv.size**2 * cv.num_colors] else: dims = [node1.numlabels * node1.dimensions, node2.numlabels * node2.dimensions] param.dimensions.extend(dims) if tied_to: if self.transpose: self.params[param.name] = tied_to.params[param.name].T else: self.params[param.name] = tied_to.params[param.name] mat = self.params[param.name] else: if param.mat: # and 'grad' not in param.name: mat = util.ParameterAsNumpy(param) else: mat = self.InitializeParameter(param) self.params[param.name] = cm.CUDAMatrix(mat) if param.name == 'weight': self.temp = cm.empty(mat.shape) #self.temp2 = cm.empty(mat.shape) self.gradient = cm.empty(mat.shape) self.grad_weight = cm.empty(mat.shape) self.gradient.assign(0) self.grad_weight.assign(0) if self.t_op and (self.t_op.optimizer == deepnet_pb2.Operation.PCD or \ self.t_op.optimizer == deepnet_pb2.Operation.CD): self.suff_stats = cm.empty((self.node1.numlabels * self.node1.dimensions, self.node2.numlabels * self.node2.dimensions))
def train_init(self): # init cudamat cm.cublas_init() cm.CUDAMatrix.init_random(1) self.Wgpu = cm.CUDAMatrix(self.W) self.speed = cm.empty(self.W.shape) self.speed.assign(0)
def LoadParams(self): """Load the parameters for this edge. Load the parameters if present in self.proto. Otherwise initialize them appropriately. """ proto = self.proto node1 = self.node1 node2 = self.node2 self.hyperparams = proto.hyperparams param_names = [param.name for param in proto.param] """ for param in proto.param: if 'grad_'+param.name not in param_names and not param.name.startswith('grad_'): grad_p = deepnet_pb2.Parameter() grad_p.CopyFrom(param) grad_p.name = 'grad_' + param.name grad_p.initialization = deepnet_pb2.Parameter.CONSTANT grad_p.constant = 0 proto.param.extend([grad_p]) """ for param in proto.param: if param.conv or param.local: n_locs = self.AllocateMemoryForConvolutions(param, node1, node2) if not param.dimensions: if param.conv: cv = param.conv_params dims = [cv.num_filters, cv.size**2 * cv.num_colors] elif param.local: dims = [cv.num_filters, n_locs**2 * cv.size**2 * cv.num_colors] else: dims = [node1.numlabels * node1.dimensions, node2.numlabels * node2.dimensions] param.dimensions.extend(dims) if param.mat: # and 'grad' not in param.name: print 'Loading saved parameters' mat = util.ParameterAsNumpy(param) else: mat = self.InitializeParameter(param) self.params[param.name] = cm.CUDAMatrix(mat) if param.name == 'weight': self.temp = cm.empty(mat.shape) self.temp2 = cm.empty(mat.shape) self.grad_weight = cm.empty(mat.shape) self.grad_weight.assign(0)
def _init_bias(self): assert self.use_bias self.biases = cm.CUDAMatrix( np.zeros((1, self.next_size)) ) self.active_biases = cm.empty(self.biases.shape) self.biases_grad = cm.empty(self.biases.shape) if self.use_momentum: self.biases_update = \ cm.CUDAMatrix(np.zeros(self.biases_grad.shape)) if self.use_rmsprop: self.biases_rmsprop_cache = \ cm.CUDAMatrix(np.zeros(self.biases_grad.shape)) self.biases_grad_square = \ cm.CUDAMatrix(np.zeros(self.biases_grad.shape))
def AllocateMemoryForConvolutions(self, param, node1, node2): self.conv = param.conv self.local = param.local if self.conv: assert not self.local else: assert not self.conv self.conv_params = param.conv_params num_colors = self.conv_params.num_colors num_filters = self.conv_params.num_filters size = self.conv_params.size padding = self.conv_params.padding stride = self.conv_params.stride numdims, numimages = node1.state.shape assert numdims % num_colors == 0 x = int(np.sqrt(numdims / num_colors)) assert x**2 == numdims / num_colors n_locs = (x + 2 * padding - size) / stride + 1 input_shape = node1.state.shape[::-1] output_shape = node2.state.shape[::-1] self.input_t = cm.empty(input_shape) self.input_t2 = cm.empty(input_shape) self.output_t = cm.empty(output_shape) self.output_t2 = cm.empty(output_shape) if param.conv_params.max_pool: pool_output_size = n_locs**2 * num_filters self.unpooled_layer = cm.empty((numimages, pool_output_size)) pool_size = param.conv_params.pool_size pool_stride = param.conv_params.pool_stride n_pool_locs = (n_locs - pool_size) / pool_stride + 1 assert output_shape[1] == n_pool_locs**2 * num_filters if param.conv_params.prob: self.rnd = cm.empty(self.unpooled_layer.shape) else: assert output_shape[1] == n_locs**2 * num_filters if param.conv_params.rnorm: self.unrnormalized_layer = cm.empty(output_shape) self.denoms = cm.empty(output_shape) self.rnorm_temp1 = cm.empty(output_shape) self.rnorm_temp2 = cm.empty(output_shape) return n_locs
def __init__(self, epsilon, momentum, num_epochs, batch_size, num_batches, dim_in, dim_out, num_hid): # training parameters self.epsilon = epsilon self.momentum = momentum self.num_epochs = num_epochs self.batch_size = batch_size self.num_batches = num_batches # model parameters self.dim_in = dim_in self.dim_out = dim_out self.num_hid = num_hid # initialize weights self.w_w1 = cm.CUDAMatrix(dim_in ** -0.5 * np.random.randn(dim_in, num_hid)) self.w_b1 = cm.CUDAMatrix(np.zeros((num_hid, 1))) self.w_w2 = cm.CUDAMatrix(num_hid ** -0.5 * np.random.randn(num_hid, dim_out)) self.w_b2 = cm.CUDAMatrix(np.zeros((dim_out, 1))) # initialize weight update matrices self.wu_w1 = cm.empty(self.w_w1.shape).assign(0) self.wu_b1 = cm.empty(self.w_b1.shape).assign(0) self.wu_w2 = cm.empty(self.w_w2.shape).assign(0) self.wu_b2 = cm.empty(self.w_b2.shape).assign(0) # initialize temporary storage self.h = cm.empty((self.num_hid, self.batch_size)) self.out = cm.empty((self.dim_out, self.batch_size)) self.delta = cm.empty((self.num_hid, self.batch_size))
def back_propagate(self,error=None): if(error is not None): self.error = cm.error #python doesn't easily allow reversed(enumerate()) - use this instead for l in reversed(self.layer): #if we're on the last layer #print(str(index)); if(l.index == len(self.layer)-1): delta_temp = cm.CUDAMatrix(np.append(self.error,np.zeros((1,self.error.shape[1])),axis=0)) else: #Possible TODO?: is there a way to get rid of this transpose? it is slow to have to do this #delta_temp = cm.empty((self.layer[l.index+1].weights.shape[1],self.layer[l.index+1].weights.shape[0])) #delta_temp = self.layer[l.index+1].weights.transpose() self.layer[l.index+1].weights.set_trans(True); delta_temp = cm.dot(self.layer[l.index+1].weights,self.layer[l.index+1].delta); self.layer[l.index+1].weights.set_trans(False); if(l.activation == 'squash'): pass #l.activation_derivative = 1.0/((1+np.abs(l.weighted_sums)**2)) elif(l.activation == 'sigmoid'): #l.activation_derivative = cm.empty(l.output.shape); l.output.apply_logistic_deriv(l.output) l.activation_derivative = l.output #elif(l.activation == 'linear_rectifier'): #1 if greater than 0, 0 otherwise. #This stores them as bools - but it doesn't matter #l.activation_derivative = np.greater(l.output,0); else: #base case is linear l.activation_derivative = cm.empty(l.output.shape); l.activation_derivitive.assign_scalar(1.0) #bottom row of activation derivative is the bias 'neuron' l.delta = cm.empty(delta_temp.shape) l.activation_derivative.mult(delta_temp,target=l.delta) #calculate weight gradient #input_t = cm.empty((l.input.shape[1],l.input.shape[0])) #input_t = l.input.transpose() l.input.set_trans(True) l.gradient.add_dot(l.delta,l.input); l.input.set_trans(False) self.epoch_size = self.epoch_size + self.input.shape[1];
def back_propagate(self, error=None): if error is not None: self.error = cm.error # python doesn't easily allow reversed(enumerate()) - use this instead for l in reversed(self.layer): # if we're on the last layer # print(str(index)); if l.index == len(self.layer) - 1: delta_temp = cm.CUDAMatrix(np.append(self.error, np.zeros((1, self.error.shape[1])), axis=0)) else: # Possible TODO?: is there a way to get rid of this transpose? it is slow to have to do this # delta_temp = cm.empty((self.layer[l.index+1].weights.shape[1],self.layer[l.index+1].weights.shape[0])) # delta_temp = self.layer[l.index+1].weights.transpose() self.layer[l.index + 1].weights.set_trans(True) delta_temp = cm.dot(self.layer[l.index + 1].weights, self.layer[l.index + 1].delta) self.layer[l.index + 1].weights.set_trans(False) if l.activation == "squash": pass # l.activation_derivative = 1.0/((1+np.abs(l.weighted_sums)**2)) elif l.activation == "sigmoid": # l.activation_derivative = cm.empty(l.output.shape); l.output.apply_logistic_deriv(l.output) l.activation_derivative = l.output # elif(l.activation == 'linear_rectifier'): # 1 if greater than 0, 0 otherwise. # This stores them as bools - but it doesn't matter # l.activation_derivative = np.greater(l.output,0); else: # base case is linear l.activation_derivative = cm.empty(l.output.shape) l.activation_derivitive.assign_scalar(1.0) # bottom row of activation derivative is the bias 'neuron' l.delta = cm.empty(delta_temp.shape) l.activation_derivative.mult(delta_temp, target=l.delta) # calculate weight gradient # input_t = cm.empty((l.input.shape[1],l.input.shape[0])) # input_t = l.input.transpose() l.input.set_trans(True) l.gradient.add_dot(l.delta, l.input) l.input.set_trans(False) self.epoch_size = self.epoch_size + self.input.shape[1]
def compute_gamma_entropy(self, G): if not self.gpu: Prod = G * (np.log(G) - 1) ent = np.nan_to_num(Prod).sum() else: Prod = cm.empty(G.shape) Prod = G.mult(cm.log(G.copy()).subtract(1), target=Prod) ent = np.nan_to_num(Prod.asarray()).sum() return ent
def _calculate_moments_syn(self, x, ws, quick=False): """Calculate moments based on the weights and samples. We also calculate and save MI, TC, additivity, and the value of the objective. Note it is assumed that <X_i^2> = 1! """ m = {} # Dictionary of moments eps = 10**-8 if self.gpu: y = cm.empty((self.n_samples, self.m)) wc = cm.CUDAMatrix(ws) cm.dot(x, wc.T, target=y) # + noise, but it is included analytically del wc else: y = x.dot(ws.T) # + noise, but it is included analytically if self.gpu: tmp_dot = cm.empty((self.nv, self.m)) cm.dot(x.T, y, target=tmp_dot) m["X_i Y_j"] = tmp_dot.asarray( ) / self.n_samples # nv by m, <X_i Y_j> del y del tmp_dot else: m["X_i Y_j"] = x.T.dot(y) / self.n_samples m["cy"] = ws.dot( m["X_i Y_j"]) + self.yscale**2 * np.eye(self.m) # cov(y.T), m by m m["Y_j^2"] = np.diag(m["cy"]).copy() m["ry"] = m["cy"] / (np.sqrt(m["Y_j^2"]) * np.sqrt(m["Y_j^2"][:, np.newaxis])) m["rho"] = (m["X_i Y_j"] / np.sqrt(m["Y_j^2"])).T m["invrho"] = 1. / (1. - m["rho"]**2) m["rhoinvrho"] = m["rho"] * m["invrho"] m["Qij"] = np.dot(m['ry'], m["rhoinvrho"]) m["Qi"] = np.einsum('ki,ki->i', m["rhoinvrho"], m["Qij"]) m["Si"] = np.sum(m["rho"] * m["rhoinvrho"], axis=0) m["MI"] = -0.5 * np.log1p(-m["rho"]**2) m["X_i Z_j"] = np.linalg.solve(m["cy"], m["X_i Y_j"].T).T m["X_i^2 | Y"] = ( 1. - np.einsum('ij,ij->i', m["X_i Z_j"], m["X_i Y_j"])).clip(1e-6) mi_yj_x = 0.5 * np.log(m["Y_j^2"]) - 0.5 * np.log(self.yscale**2) mi_xi_y = -0.5 * np.log(m["X_i^2 | Y"]) m["TCs"] = m["MI"].sum(axis=1) - mi_yj_x m["additivity"] = (m["MI"].sum(axis=0) - mi_xi_y).sum() m["TC"] = np.sum(mi_xi_y) - np.sum(mi_yj_x) return m
def loadNN(self,filename): d = scipy.io.loadmat(filename) for i in range(self.H): self.W.append(cm.CUDAMatrix(d['W%d'%i])) self.b.append(cm.CUDAMatrix(d['b%d'%i])) self.dW.append(cm.CUDAMatrix(np.zeros(self.W[i].shape))) self.W_inc.append(cm.CUDAMatrix(np.zeros(self.W[i].shape))) self.db.append(cm.CUDAMatrix(np.zeros(self.b[i].shape))) self.b_inc.append(cm.CUDAMatrix(np.zeros(self.b[i].shape))) self.h.append(cm.empty((self.W[i].shape[1],self.batchsize)))
def __init__(self, w, config=None): if type(w) == np.ndarray: self.w_ = cm.CUDAMatrix(w) elif type(w) == tuple: self.w_ = cm.empty(w) else: self.w_ = w self.dw_ = cm.empty_like(self.w_) self.dw_history_ = cm.empty_like(self.w_) self.dw_history_.assign(0) self.dw_.assign(0) self.t_ = 0 self.rms_prop_ = config.rms_prop self.rms_prop_factor_ = config.rms_prop_factor if self.rms_prop_: self.rms_prop_history_ = cm.empty_like(self.dw_) self.rms_prop_history_.assign(1) if config is None: pass elif config.init_type == config_pb2.Param.CONSTANT: self.w_.assign(config.scale) elif config.init_type == config_pb2.Param.GAUSSIAN: self.w_.fill_with_randn() self.w_.mult(config.scale) elif config.init_type == config_pb2.Param.UNIFORM: self.w_.fill_with_rand() self.w_.subtract(0.5) self.w_.mult(2 * config.scale) elif config.init_type == config_pb2.Param.LSTM_BIAS: init_bias = [ config.input_gate_bias, config.forget_gate_bias, config.input_bias, config.output_gate_bias ] self.w_.reshape((-1, 4)) for i in xrange(4): self.w_.slice(i, (i + 1)).assign(init_bias[i]) self.w_.reshape((-1, 1)) elif config.init_type == config_pb2.Param.PRETRAINED: f = h5py.File(config.file_name) mat = f[config.dataset_name].value if len(mat.shape) == 1: mat = mat.reshape(1, -1) assert self.w_.shape == mat.shape self.w_.overwrite(mat) f.close() else: raise Exception('Unknown parameter initialization.') self.eps_ = config.epsilon self.momentum_ = config.momentum self.l2_decay_ = config.l2_decay self.gradient_clip_ = config.gradient_clip self.eps_decay_factor = config.eps_decay_factor self.eps_decay_after = config.eps_decay_after
def __init__(self, N=1000, pz=1, pg=0.1, g=1.5, alpha=1, dt=0.1, num_fits=1, num_inputs=0, state=None): cm.cublas_init() if state is not None: self.from_dict(state) else: self.N = N self.pg = pg self.pz = pz self.g = g self.alpha = alpha self.DT = dt self.num_fits = num_fits scale = 1.0 / np.sqrt(self.pg * self.N) M_rvs = stats.norm(loc=0, scale=scale).rvs self.M = sp.sparse.random(N, N, pg, data_rvs=M_rvs) * g self.M = cm.CUDAMatrix(self.M.toarray()) self.P = (1.0 / self.alpha) * np.identity(N) self.wf = cm.CUDAMatrix(np.random.uniform(-1, 1, (N, num_fits))) #self.wo = np.expand_dims(stats.norm(loc=0,scale=(1.0/np.sqrt(N))).rvs(N),num_fits) self.wo = cm.CUDAMatrix(np.zeros((N, num_fits))) self.dw = np.zeros((N, num_fits)) self.woc = np.zeros((N, 1)) self.wfc = np.random.uniform(-1, 1, (N, 1)) self.x = cm.CUDAMatrix(np.expand_dims(0.5 * np.random.randn(N), 1)) self.xdt = cm.empty(self.x.shape).assign(0) self.r = cm.tanh(self.x) self.rdt = cm.empty(self.r.shape).assign(0) self.z = cm.CUDAMatrix( np.expand_dims(0.5 * np.random.randn(num_fits), 1)) self.zdt = cm.empty(self.z.shape).assign(0) self.z_ctl = np.expand_dims(0.5 * np.random.randn(1), 1)
def alloc(self, *args): ''' Allocate temporary GPU memory alloc(N, K, D) allo(key_shape_mapping) ''' if len(args) == 3: N, K, D = args key_shape_mapping = { 'posteriors_NxK': (N, K), # big 'weighted_X_sum_KxD': (K, D), # medium 'vmax_Nx1': (N, 1), 'logprob_Nx1': (N, 1), 'inv_weights_Kx1': (K, 1), 'temp_NxD': (N, D), # big 'temp_KxD': (K, D), # medium 'temp_KxD_2': (K, D), # medium 'temp_Kx1': (K, 1), 'temp_Kx1_2': (K, 1), } elif len(args) == 1: key_shape_mapping = args[0] else: ValueError( 'TempGPUMem: alloc(N, K, D) or alloc(key_shape_mapping)') # allocate memory for key, shape in key_shape_mapping.iteritems(): if key not in self: logger.debug('%s: created %s at key %s', sys._getframe().f_code.co_name, shape, key) self[key] = cm.empty(shape) elif self[key].shape != shape: logger.debug('%s: reshaped %s from %s to %s', sys._getframe().f_code.co_name, key, self[key].shape, shape) self[key] = cm.empty(shape)
def _sig(self, x, u): """Multiple the matrix u by the covariance matrix of x. We are interested in situations where n_variables >> n_samples, so we do this without explicitly constructing the covariance matrix.""" if self.gpu: y = cm.empty((self.n_samples, self.m)) uc = cm.CUDAMatrix(u) cm.dot(x, uc.T, target=y) del uc tmp = cm.empty((self.nv, self.m)) cm.dot(x.T, y, target=tmp) tmp_dot = tmp.asarray() del y del tmp else: y = x.dot(u.T) tmp_dot = x.T.dot(y) prod = ( 1 - self.eps**2 ) * tmp_dot.T / self.n_samples + self.eps**2 * u # nv by m, <X_i Y_j> / std Y_j return prod
def train_step(self, X1, X2): [cost, grad] = self.calc_cost(X1, X2) grad.mult(self.lrate) self.speed.mult(self.moment) self.speed.subtract(grad) self.Wgpu.add(self.speed) Wmask = cm.empty(self.W.shape) self.Wgpu.greater_than(0, target = Wmask) self.Wgpu.mult(Wmask) return cost
def getTestDataUpwards(self,data): batchsize = 4096 numbatch = data.shape[1]/batchsize topdata = np.zeros((self.num_hid,data.shape[1])) for batch in range(numbatch): batchdata = data[:,batch*batchsize:(batch+1)*batchsize] temp = cm.empty((self.num_hid,batchdata.shape[1])) vis = cm.CUDAMatrix(batchdata) self.hidActProb(vis, temp) temp.copy_to_host() topdata[:,batch*batchsize:(batch+1)*batchsize] = temp.numpy_array return topdata
def __init__(self, w, config=None): if type(w) == np.ndarray: self.w_ = cm.CUDAMatrix(w) elif type(w) == tuple: self.w_ = cm.empty(w) else: self.w_ = w self.dw_ = cm.empty_like(self.w_) self.dw_history_ = cm.empty_like(self.w_) self.dw_history_.assign(0) self.dw_.assign(0) self.t_ = 0 self.rms_prop_ = config.rms_prop self.rms_prop_factor_ = config.rms_prop_factor if self.rms_prop_: self.rms_prop_history_ = cm.empty_like(self.dw_) self.rms_prop_history_.assign(1) if config is None: pass elif config.init_type == config_pb2.Param.CONSTANT: self.w_.assign(config.scale) elif config.init_type == config_pb2.Param.GAUSSIAN: self.w_.fill_with_randn() self.w_.mult(config.scale) elif config.init_type == config_pb2.Param.UNIFORM: self.w_.fill_with_rand() self.w_.subtract(0.5) self.w_.mult(2 * config.scale) elif config.init_type == config_pb2.Param.LSTM_BIAS: init_bias = [config.input_gate_bias, config.forget_gate_bias, config.input_bias, config.output_gate_bias] self.w_.reshape((-1, 4)) for i in xrange(4): self.w_.slice(i, (i+1)).assign(init_bias[i]) self.w_.reshape((-1, 1)) elif config.init_type == config_pb2.Param.PRETRAINED: f = h5py.File(config.file_name) mat = f[config.dataset_name].value if len(mat.shape) == 1: mat = mat.reshape(1, -1) assert self.w_.shape == mat.shape self.w_.overwrite(mat) f.close() else: raise Exception('Unknown parameter initialization.') self.eps_ = config.epsilon self.momentum_ = config.momentum self.l2_decay_ = config.l2_decay self.gradient_clip_ = config.gradient_clip self.eps_decay_factor = config.eps_decay_factor self.eps_decay_after = config.eps_decay_after
def load(self, modelnamelist): DBN.load(self,modelnamelist) for i in range(self.H): # print type(self.model[i].W) self.W.append(self.model[i].W) # self.W[i].copy_to_host() # print self.W[i].numpy_array self.dW.append(cm.CUDAMatrix(np.zeros(self.model[i].W.shape))) self.W_inc.append(cm.CUDAMatrix(np.zeros(self.model[i].W.shape))) self.b.append(self.model[i].hb) self.db.append(cm.CUDAMatrix(np.zeros(self.model[i].hb.shape))) self.b_inc.append(cm.CUDAMatrix(np.zeros(self.model[i].hb.shape))) self.h.append(cm.empty((self.model[i].num_hid,self.batchsize)))
def test_where(): m = 256 n = 128 a = np.array(np.random.randn(m, n) * 10, dtype=np.float32, order='F') z = np.zeros_like(a) res = np.where(a > 0, a, z) a_d = cm.CUDAMatrix(a) z_d = cm.CUDAMatrix(z) res_d = cm.empty(a_d.shape) a_d.greater_than(0, res_d) cm.where(res_d, a_d, z_d) assert np.abs(res - res_d.asarray()).max() < 1e-2, "Error in cudamat.where"
def test_where(): m = 256 n = 128 a = np.array(np.random.randn(m, n)*10, dtype=np.float32, order='F') z = np.zeros_like(a) res = np.where(a > 0, a, z); a_d = cm.CUDAMatrix(a) z_d = cm.CUDAMatrix(z) res_d = cm.empty(a_d.shape) a_d.greater_than(0, res_d) cm.where(res_d, a_d, z_d) assert np.abs(res-res_d.asarray()).max() < 1e-2, "Error in cudamat.where"
def __init__(self, input_dim, output_dim, spectral_radius=.9, leak_rate=1, input_scaling=1, bias_scaling=0): super(CUDAReservoirNode, self).__init__(input_dim=input_dim, output_dim=output_dim,) self.input_dim = input_dim self.output_dim = output_dim self.leak_rate = leak_rate w = mdp.numx.random.normal(0, 1, (output_dim, output_dim)) w_in = mdp.numx.random.uniform(-1, 1, (output_dim, input_dim)) * input_scaling if output_dim < 1500: l = mdp.numx.linalg.eigvals(w) r = mdp.numx.amax(mdp.numx.absolute(l)) w = w * (spectral_radius / r) self.w = cm.CUDAMatrix(w) else: self.w = cm.CUDAMatrix(w) r = get_specrad(self.w) self.w.mult(spectral_radius / r) bias = mdp.numx.random.normal(0, 1, (output_dim, 1)) * bias_scaling self.w_in = cm.CUDAMatrix(w_in) self.bias = cm.CUDAMatrix(bias) self.current_state = cm.empty((self.output_dim, 1)) self.new_state = cm.empty((self.output_dim, 1))
def setVariables(self): n, m, r = self.n, self.m, self.rank self.G_gpu = cm.CUDAMatrix(self.G) self.W_gpu = cm.CUDAMatrix(self.W) self.X_gpu = cm.CUDAMatrix(self.X) self.XTX_gpu= cm.dot(self.X_gpu.T, self.X_gpu) self.XTXpos_gpu = cm.empty((m,m)) self.XTX_gpu.greater_than(0, target=self.XTXpos_gpu) self.XTXpos_gpu.mult(self.XTX_gpu) self.XTXneg_gpu = cm.empty((m,m)) self.XTXpos_gpu.subtract(self.XTX_gpu, target=self.XTXneg_gpu) self.XTXnegW_gpu = cm.empty((m,r)) self.XTXposW_gpu = cm.empty((m,r)) self.GWT_gpu = cm.empty((m,m)) self.update1_gpu = cm.empty((m,r)) self.update2_gpu = cm.empty((m,r)) self.GTG_gpu = cm.empty((r,r)) self.XTXnegG_gpu = cm.empty((m,r)) self.XTXposG_gpu = cm.empty((m,r))
def __init__(self, datafolder=None, num_hid=None, options=None): if datafolder == None: return self.datafolder = datafolder self.datalist = os.listdir(datafolder) self.num_batchdata = len(self.datalist) mdict = scipy.io.loadmat(os.path.join(datafolder, self.datalist[0])) tempdata = mdict['data'] self.options = options self.num_vis = tempdata.shape[0] self.num_hid = num_hid # print self.num_vis # print self.num_hid self.num_batches = tempdata.shape[1]/self.options.batchsize self.batch_size = self.options.batchsize self.doPCD = False self.cdstep = 1 # initialize weights self.W = cm.CUDAMatrix(0.01 * np.random.randn(self.num_vis, self.num_hid)) self.vb = cm.CUDAMatrix(np.zeros((self.num_vis,1)))# for gaussian rbm, v_bias we mean the mean of visible layer self.hb = cm.CUDAMatrix(np.zeros((self.num_hid,1))) # initialize weights updates self.dW = cm.CUDAMatrix(np.zeros((self.num_vis, self.num_hid))) self.dvb = cm.CUDAMatrix(np.zeros((self.num_vis, 1))) self.dhb = cm.CUDAMatrix(np.zeros((self.num_hid, 1))) self.W_inc = cm.CUDAMatrix(np.zeros((self.num_vis, self.num_hid))) self.vb_inc = cm.CUDAMatrix(np.zeros((self.num_vis,1))) self.hb_inc = cm.CUDAMatrix(np.zeros((self.num_hid,1))) # initialize temporary storage self.v = cm.empty((self.num_vis, self.batch_size))# a batch of data self.vm = cm.empty((self.num_vis, self.batch_size))# temp storage of data-vb self.h = cm.empty((self.num_hid, self.batch_size)) self.r = cm.empty((self.num_hid, self.batch_size))# store random number in positive phase self.r2 = cm.empty((self.num_vis, self.batch_size))# store random number in negative phase
def Get(self, blocksize=None): if blocksize and blocksize != self.blocksize: self.blocksize = blocksize if self._position >= self.current_size or self._data is None: self._position = 0 if self.num_blocks > 1 or self._data is None: if self.verbose: print 'CACHE MISS in %s' % self.name if self.gpu: if self._data: self._data.free_device_memory() self._data = cm.CUDAMatrix( self.source.Get((self.capacity)).reshape(1, -1)) self.current_size = self._data.shape[1] else: self._data = self.source.Get((self.capacity)) self.current_size = self._data.shape[0] if self.randomize: if self.permutation_link: self.indices = self.permutation_link.indices else: p = np.arange(self.current_size / self.numdims) np.random.shuffle(p) if self.gpu: if self.indices is not None: self.indices.free_device_memory() p2 = p.view() p2.shape = 1, -1 self.indices = cm.CUDAMatrix(p2) else: self.indices = p if self.gpu: self._data.reshape((self.numdims, self.current_size / self.numdims)) shuffled_data = cm.empty(self._data.shape) self._data.select_columns(self.indices, target=shuffled_data) self._data.free_device_memory() self._data = shuffled_data self._data.reshape((1, self.current_size)) else: view = self._data.view() view.shape = self.current_size / self.numdims, self.numdims self._data = view[self.indices,:].reshape(-1) span = min(self.blocksize, self.current_size - self._position) self._position += span if self.gpu: return self._data.slice(self._position - span, self._position) else: return self._data[self._position - span : self._position]
def LoadParams(self, proto): self.hyperparams = proto.hyperparams param_names = [param.name for param in proto.param] for param in proto.param: if not param.dimensions: param.dimensions.extend([proto.numlabels * proto.dimensions]) if param.mat: mat = util.ParameterAsNumpy(param).reshape(-1, 1) else: mat = self.InitializeParameter(param).reshape(-1, 1) self.params[param.name] = cm.CUDAMatrix(mat) if param.name == 'bias': self.grad_bias = cm.empty(mat.shape) self.grad_bias.assign(0) self.sample_input = self.hyperparams.sample_input