def recurrence(xp_t, xp_t1, xq_t1, mask_t, h_t_pre1, chs): # gru unit z_r = sigmoid( T.dot(ui[:2], xp_t.T) + T.dot(wh[:2], h_t_pre1.T) + bi[:2]) z, r = z_r[0].T, z_r[1].T # shape=(n, 20) c = tanh( T.dot(ui[2], xp_t.T) + T.dot(wh[2], (r * h_t_pre1).T) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T # shape=(n, 20) # context_h # 定义定长矩阵,h_t拼接到最底下,删除首行, 矩阵维度不变。 chs = T.concatenate( ( chs[:, 1:, :], # shape=(n, winh-1, 20) h_t.dimshuffle(0, 'x', 1)), # shape=(n, 1, 20) axis=1) # shape=(n, winh, 20) ehs = T.dot(tanh(T.dot(chs, qh)), rh) # shape=(n, winh, 1) ehs = T.Rebroadcast((2, True))(ehs) # axis=2进行broadcast, 使其可被丢掉 ahs0 = softmax(ehs.dimshuffle( 0, 1)) # shape=(n, winh),降一维。因为softmax按行处理。 ahs = ahs0.dimshuffle(0, 1, 'x') # shape=(n, winh, 1), 升一维。还原回去 ahs = T.Rebroadcast((2, True))(ahs) # axis=2进行broadcast, 使其可做乘法。 hcs = T.sum(chs * ahs, axis=1) # shape=(n, 20) # 整体表达hws,融合当前hts、上下文hcs hws = tanh(T.dot(h_t, e.T) + T.dot(hcs, f.T)) # shape=(n, 20) # loss upq_t = T.sum( hws * (xp_t1 - xq_t1), axis=1) # shape=(n, ), h(t) * (xp(t+1) - xq(t+1)), 正负样本训练。 loss_t = T.log(sigmoid(upq_t)) loss_t *= mask_t # 只在损失这里乘一下0/1向量就可以了 return [h_t, chs, loss_t]
def recurrence(xp_t, h_t_pre1, cxs): # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20) # (n, winx, 20) = T.concatenate((((n, winx-1, 20)), ((n, 1, 20))), axis=1) # context_x # 定义定长矩阵,xp_t拼接到最底下,删除首行, 矩阵维度不变。 cxs = T.concatenate( ( cxs[:, 1:, :], # shape=(n, winx-1, 20) xp_t.dimshuffle(0, 'x', 1)), # shape=(n, 1, 20) axis=1) # shape=(n, winx, 20) exs = T.dot(tanh(T.dot(cxs, qx)), rx) # shape=(n, winx, 1) exs = T.Rebroadcast((2, True))(exs) # axis=2进行broadcast, 使其可被丢掉 axs0 = softmax(exs.dimshuffle( 0, 1)) # shape=(n, winx),降一维。因为softmax按行处理。 axs = axs0.dimshuffle(0, 1, 'x') # shape=(n, winx, 1), 升一维。还原回去。 axs = T.Rebroadcast((2, True))(axs) # axis=2进行broadcast, 使其可做乘法。 # (n, 20) = T.sum((n, winx, 20) * (n, winx, 1), axis=1) xc = T.sum(cxs * axs, axis=1) # shape=(n, 20) # gru unit z_r = sigmoid( T.dot(ui[:2], xp_t.T) + T.dot(vc[:2], xc.T) + T.dot(wh[:2], h_t_pre1.T) + bi[:2]) z, r = z_r[0].T, z_r[1].T # shape=(n, 20) c = tanh( T.dot(ui[2], xp_t.T) + T.dot(vc[2], xc.T) + T.dot(wh[2], (r * h_t_pre1).T) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T # shape=(n, 20) return [h_t, cxs, axs0] # 每处位置的权重也返回, shape=(n, winx)
def filter_variable(self, other): """Convert a Variable into a CudaNdarrayType, if compatible. This Variable should either already be a CudaNdarrayType, or be a TensorType. It has to have the right number of dimensions, broadcastable pattern, and dtype. """ if hasattr(other, '_as_CudaNdarrayVariable'): other = other._as_CudaNdarrayVariable() if not isinstance(other, Variable): # The value is not a Variable: we cast it into # a Constant of the appropriate Type. other = self.Constant(type=self, data=other) if other.type == self: return other if not isinstance(other.type, tensor.TensorType): raise TypeError('Incompatible type', (self, other.type)) if (other.type.dtype != self.dtype): raise TypeError('Incompatible dtype', (self.dtype, other.type.dtype)) if numpy.any([bi and not obi for obi, bi in zip( other.type.broadcastable, self.broadcastable)]): raise TypeError('Incompatible broadcastable', (self.broadcastable, other.type.broadcastable)) if other.type.broadcastable != self.broadcastable: rebroadcast = tensor.Rebroadcast(*enumerate(self.broadcastable)) other = rebroadcast(other) return theano.sandbox.cuda.basic_ops.GpuFromHost()(other)
def translation_prediction(model_file): y = T.ivector('y') index = T.lscalar() dummy = T.ftensor4('dummy') dataset = 'mnist.pkl.gz' datasets = loaddata_mnist(dataset) test_set_x, test_set_y = datasets[2] test_set_x = test_set_x.reshape((10000, 1, 28, 28)) temp_test_set_x = theano.shared(numpy.zeros(test_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_test_set_xx = T.Rebroadcast((1, True))(temp_test_set_x) with open(model_file+'.pkl', 'rb') as f: layer0, layer1, layer2_input, layer2, layer3 = pickle.load(f) error_spectrum = numpy.zeros((21, 21)) #### Copier is used to update the testing set #### #### Relace the shared varialbe temp_test_set_x by dummy, then, it will update the variable passing to the predict model #### #### As function could only input shared variable, therefore, it is done in this way #### update = (temp_test_set_x, dummy) copier = theano.function([dummy],temp_test_set_x, updates=[update]) predict_model = theano.function(inputs=[index], outputs=layer3.errors(y), givens={layer0.input: temp_test_set_xx[index * 500: (index + 1) * 500], y: test_set_y[index * 500: (index + 1) * 500]}) print('Start Predicting...') start_time = timeit.default_timer() for horizontal in range(-20, 21, 2): temp_time_1 = timeit.default_timer() for vertical in range(-20, 21, 2): predicted_values = 0 tran_test_set = theano_translation_updating(test_set_x, horizontal, vertical).reshape((-1, 1, 28, 28)) copier(tran_test_set) #print('Horizontal Shift:' + str(horizontal) + '; Vertical Shift:' + str(vertical)) for batch_value in range(0, 20, 1): temp_predicted_values = predict_model(batch_value) predicted_values = temp_predicted_values + predicted_values predicted_values = predicted_values/20 error_spectrum[vertical/2 + 10, horizontal/2 + 10] = predicted_values temp_time_2 = timeit.default_timer() print 'Horizontal'+str(horizontal) print('This loop ran for %.2fm' % ((temp_time_2 - temp_time_1) / 60.)) end_time = timeit.default_timer() print('The code ran for %.2fm' % ((end_time - start_time) / 60.)) scipy.io.savemat(model_file+'error_spectrum.mat', mdict={'Error_Spectrum': error_spectrum}) return error_spectrum
def theano_translation_old(image_tensor_input, displacement, horizontal, vertical, borrow=True): tx = image_tensor_input temp1 = numpy.zeros((10000, 1, 28, 28), dtype=theano.config.floatX) txout = theano.shared(temp1, borrow=borrow) txout = T.Rebroadcast((1, True))(txout) if vertical == 0: if displacement >= 0: txout = T.set_subtensor(txout[:, :, :, displacement:27], tx[:, :, :, 0:27 - displacement]) else: txout = T.set_subtensor(txout[:, :, :, 0:27 + displacement], tx[:, :, :, -displacement:27]) if horizontal == 0: if displacement >= 0: txout = T.set_subtensor(txout[:, :, displacement:27, :], tx[:, :, 0:27 - displacement, :]) else: txout = T.set_subtensor(txout[:, :, 0:27 - displacement, :], tx[:, :, displacement:27, :]) return txout
def get_corrupted_input_whole_minibatch(self, inp, corruption_prob): # 处理3D矩阵 retain_prob = 1. - corruption_prob randoms = self.thea_rng.binomial( size=(inp.shape[0], inp.shape[1], 1), # shape=(seq_length, batch_size, 1) n=1, p=retain_prob, # p是得1的概率。 dtype=theano.config.floatX) randoms = T.Rebroadcast((2, True))(randoms) return inp * randoms # shape=(seq_length, batch_size, 1024)
def rotate_and_translate(self, coords, golkov=False, angle_std=0.392): """ Rotates and translates the coordinates of a molecule. Two options exist for the rotation matrix: either to create it through a QR decomposition (cf. Golkov MSc thesis), or to use Given's rotation matrices. :param coords: the coordinates of the molecule that we want to rotate and translate :param golkov: boolean: True - use QR decomposition to obtain a rotation matrix False - use Givens rotations to define the rotation matrix :param angle_std: only for the Given's rotations case: sets the std. deviation of the rotation angle (which is sampled from a gaussian with mean 0). :return: the rotated and translated coordinates """ # generate a random rotation matrix Q random_streams = T.shared_randomstreams.RandomStreams() if golkov: randn_matrix = random_streams.normal((3, 3), dtype=floatX) # QR decomposition, Q is orthogonal, see Golkov MSc thesis, Lemma 1 Q, R = T.nlinalg.qr(randn_matrix) # Mezzadri 2007 "How to generate random matrices from the classical compact groups" Q = T.dot(Q, T.nlinalg.AllocDiag()( T.sgn(R.diagonal()))) # stackoverflow.com/questions/30692742 Q = Q * T.nlinalg.Det()(Q) # stackoverflow.com/questions/30132036 R = Q else: angle = random_streams.normal((3,), avg=0., std=angle_std, ndim=1, dtype=floatX) R_X = T.as_tensor([1, 0, 0, 0, T.cos(angle[0]), -T.sin(angle[0]), 0, T.sin(angle[0]), T.cos(angle[0])]).reshape( (3, 3)) R_Y = T.as_tensor([T.cos(angle[1]), 0, -T.sin(angle[1]), 0, 1, 0, T.sin(angle[1]), 0, T.cos(angle[1])]).reshape( (3, 3)) R_Z = T.as_tensor([T.cos(angle[2]), -T.sin(angle[2]), 0, T.sin(angle[2]), T.cos(angle[2]), 0, 0, 0, 1]).reshape((3, 3)) R = T.dot(T.dot(R_Z, R_Y), R_X) # apply rotation matrix to all molecules perturbated_coords = T.dot(coords, R) # determine a random translation vector coords_min = T.min(perturbated_coords, axis=1, keepdims=True) coords_max = T.max(perturbated_coords, axis=1, keepdims=True) transl_min = (-self.endx + self.min_dist_from_border) - coords_min transl_max = (self.endx - self.min_dist_from_border) - coords_max rand01 = random_streams.uniform((self.minibatch_size, 1, 3), dtype=floatX) # unifom random in open interval ]0;1[ rand01 = T.Rebroadcast((1, True), )(rand01) rand_translation = rand01 * (transl_max - transl_min) + transl_min perturbated_coords += rand_translation return perturbated_coords
def theano_rotation(image_tensor_input, angle, borrow=True): tx = image_tensor_input txout = theano.shared(numpy.zeros((10000, 1, 28, 28), dtype=theano.config.floatX), borrow=borrow) if (angle % 90) == 0: if angle == 180 or angle == -180: txout = tx[:, :, ::-1, ::-1] if angle == 90 or angle == -270: txout = tx.dimshuffle(0, 1, 3, 2) txout = txout[:, :, :, ::-1] if angle == 270 or angle == -90: txout = tx.dimshuffle(0, 1, 3, 2) txout = txout[:, :, ::-1, :] if angle == 0: txout = tx if (angle % 90) != 0: if angle > 90 or angle < -90: tx = tx[:, :, ::-1, ::-1] angle = angle - numpy.sign(angle)*180 #txout = image_tensor_input angle = numpy.radians(angle) temp_position_ori = numpy.zeros((2, 28*28)) for x in range(-14, 14): for y in range(-14, 14): temp_position_ori[0, 28*(x+14)+y+14], temp_position_ori[1, 28*(x+14)+y+14] = x, y # print(temp_position_ori) #print(temp_position_ori) #rotation = numpy.array([[numpy.cos(angle), -numpy.sin(angle)], [numpy.sin(angle), numpy.cos(angle)]]) #temp_position = numpy.floor(numpy.dot(rotation, temp_position_ori)) rotation1 = numpy.array([[1, -numpy.tan(angle/2)], [0, 1]]) rotation2 = numpy.array([[1, 0], [numpy.sin(angle), 1]]) rotation3 = numpy.array([[1, -numpy.tan(angle/2)], [0, 1]]) temp_position = numpy.floor(numpy.dot(rotation1, temp_position_ori)) temp_position = numpy.floor(numpy.dot(rotation2, temp_position)) temp_position = numpy.floor(numpy.dot(rotation3, temp_position)) logic_temp_pos_p = temp_position < 14 logic_temp_pos_n = temp_position >= -14 #print(logic_temp_pos_p[0]*logic_temp_pos_p[1]) temp_position = (logic_temp_pos_p[0]*logic_temp_pos_p[1]) * \ (logic_temp_pos_n[0]*logic_temp_pos_n[1]) * temp_position temp_position = temp_position.astype(int) temp_position_ori = temp_position_ori.astype(int) txout = T.set_subtensor(txout[:, :, temp_position[1, :]+14, temp_position[0, :]+14], tx[:, :, temp_position_ori[1, :]+14, temp_position_ori[0, :]+14]) txout = T.Rebroadcast((1, True))(txout) return txout
def get_corrupted_input_whole(self, inp, corruption_prob): # 处理2D矩阵:randomly set whole feature to zero. Matrix.shape=(n, m) # denoising方式0:随机将某些图、文特征整体性置为0 # 比如原先一条序列的图像特征是(num, 1024); 那么0/1概率矩阵是(num, 1), T.Rebroadcast,再相乘 # if corruption_prob < 0. or corruption_prob >= 1.: # raise Exception('Drop prob must be in interval [0, 1)') retain_prob = 1. - corruption_prob randoms = self.thea_rng.binomial( size=(inp.shape[0], 1), # shape=(num, 1) n=1, p=retain_prob, # p是得1的概率。 dtype=theano.config.floatX) randoms = T.Rebroadcast((1, True))(randoms) return inp * randoms # shape=(num, 1024)
def theano_translation_update(image_tensor_input, image_tensor_output, horizon_disp, verti_disp, set_size, borrows): tx = image_tensor_input def vertical_shift(image_input, txout1, displacement): if displacement > 0: txout1 = T.set_subtensor(txout1[:, :, 0:27 - displacement, :], image_input[:, :, displacement:27, :]) elif displacement < 0: txout1 = T.set_subtensor(txout1[:, :, -displacement:27, :], image_input[:, :, 0:27 + displacement, :]) else: txout1 = image_input return txout1 def horizontal_shift(image_input, txout1, displacement): if displacement > 0: txout1 = T.set_subtensor(txout1[:, :, :, displacement:27], image_input[:, :, :, 0:27 - displacement]) elif displacement < 0: txout1 = T.set_subtensor(txout1[:, :, :, 0:27 + displacement], image_input[:, :, :, -displacement:27]) else: txout1 = image_input return txout1 if verti_disp != 0 and horizon_disp == 0: image_tensor_output = vertical_shift(tx, image_tensor_output, verti_disp) if horizon_disp != 0 and verti_disp == 0: image_tensor_output = horizontal_shift(tx, image_tensor_output, horizon_disp) if horizon_disp != 0 and verti_disp != 0: image_tensor_output_temp = vertical_shift(tx, image_tensor_output, verti_disp) image_tensor_output = horizontal_shift(image_tensor_output_temp, image_tensor_output, horizon_disp) if verti_disp == 0 and horizon_disp == 0: image_tensor_output = tx image_tensor_output = T.Rebroadcast((1, True))(image_tensor_output) return image_tensor_output
def theano_translation(image_tensor_input, horizon_disp, verti_disp, borrow=True): tx = image_tensor_input def vertical_shift(image_input, displacement, borrow=True): temp1 = numpy.zeros((10000, 1, 28, 28), dtype=theano.config.floatX) txout1 = theano.shared(temp1, borrow=True) if displacement > 0: txout1 = T.set_subtensor(txout1[:, :, 0:27 - displacement, :], image_input[:, :, displacement:27, :]) elif displacement < 0: txout1 = T.set_subtensor(txout1[:, :, -displacement:27, :], image_input[:, :, 0:27 + displacement, :]) else: txout1 = image_input return txout1 def horizontal_shift(image_input, displacement, borrow=True): temp1 = numpy.zeros((10000, 1, 28, 28), dtype=theano.config.floatX) txout1 = theano.shared(temp1, borrow=True) if displacement > 0: txout1 = T.set_subtensor(txout1[:, :, :, displacement:27], image_input[:, :, :, 0:27 - displacement]) elif displacement < 0: txout1 = T.set_subtensor(txout1[:, :, :, 0:27 + displacement], image_input[:, :, :, -displacement:27]) else: txout1 = image_input return txout1 if verti_disp != 0 and horizon_disp == 0: txout = vertical_shift(tx, verti_disp, borrow=True) if horizon_disp != 0 and verti_disp == 0: txout = horizontal_shift(tx, horizon_disp, borrow=True) if horizon_disp != 0 and verti_disp != 0: txout = vertical_shift(tx, verti_disp, borrow=True) txout = horizontal_shift(txout, horizon_disp, borrow=True) if verti_disp == 0 and horizon_disp == 0: txout = tx txout = T.Rebroadcast((1, True))(txout) return txout
class fader(gr.sync_block): # some consts N = 4096 NS = 8; step = T.matrix("step", dtype="complex64") l = T.iscalar("l") iv = T.cvector("iv") stepd = theano.shared(numpy.zeros((NS,N), dtype=numpy.complex64), name="stepd") phase = theano.shared(numpy.asarray([1+0j]*NS, dtype=numpy.complex64), name="phase") oo = theano.shared(numpy.asarray([1+0j]*N, dtype=numpy.complex64), name="oo") # theano functions set_step = theano.function( inputs=[step], outputs=[], updates={stepd:step}, name="set_step") rval = theano.function( inputs=[iv,l], outputs=[iv*oo[0:l]], updates={phase:phase*stepd[:,l-1]*stepd[:,1], oo:T.sum(T.Rebroadcast((1,True))(phase.dimshuffle(0,'x'))*stepd,axis=0)}, name="rval") def set_f(self, f): print "set_f %f"%(f) self.f = f; tones = map(lambda x: random.uniform(1,100), range(0,self.NS)); stepval = map(lambda x: numpy.pi*2.0*x/self.fs, tones); iv2 = numpy.vstack( map(lambda x: numpy.exp(1j*numpy.arange(0,self.N*x,x,dtype=numpy.float32), dtype=numpy.complex64), stepval)); self.set_step( iv2 ); def __init__(self, fs, f): gr.sync_block.__init__(self, name="theano_fader", in_sig=[numpy.complex64], out_sig=[numpy.complex64]) self.fs = fs self.set_f(f); def work(self, input_items, output_items): out = output_items[0] o = self.rval(input_items[0], len(output_items[0])) out[:] = o[0]; return len(output_items[0])
def apply_dropout(self, state, include_prob, scale, theano_rng, input_space, mask_value=0, per_example=True): """ Parameters ---------- ... per_example : bool, optional Sample a different mask value for every example in a batch. Default is `True`. If `False`, sample one mask per mini-batch. """ if include_prob in [None, 1.0, 1]: return state assert scale is not None if isinstance(state, tuple): return tuple( self.apply_dropout(substate, include_prob, scale, theano_rng, mask_value) for substate in state) # TODO: all of this assumes that if it's not a tuple, it's # a dense tensor. It hasn't been tested with sparse types. # A method to format the mask (or any other values) as # the given symbolic type should be added to the Spaces # interface. if per_example: mask = theano_rng.binomial(p=include_prob, size=state.shape, dtype=state.dtype) else: batch = input_space.get_origin_batch(1) mask = theano_rng.binomial(p=include_prob, size=batch.shape, dtype=state.dtype) rebroadcast = T.Rebroadcast( *zip(xrange(batch.ndim), [s == 1 for s in batch.shape])) mask = rebroadcast(mask) if mask_value == 0: return state * mask * scale else: return T.switch(mask, state * scale, mask_value)
def compute_sub_all_scores(self, start_end): # 其实可以直接传过来实数参数 # 计算users * items,每个用户对所有商品的评分(需去掉填充符) # sub_all_scores = T.dot(self.trained_users[start_end], self.trained_items[:-1].T) + \ # self.wd * self.prob[start_end] # 试试以非线性方式组合两个preferences。 # 将单变量公式改为mini-batch形式:T.dot(v, tanh(r*i + e*j)) # (n,m) -> tanh((n,m,1) * (1,d)) * (d,1) -> (n,m,1) -> (n,m) scores1 = T.dot(self.trained_users[start_end], self.trained_items[:-1].T) # # shape=(n, m) scores2 = self.prob[start_end] # shape=(n, m) av, ar, ae = self.av, self.ar, self.ae sub_all_scores = T.dot( tanh( T.dot(scores1.dimshuffle(0, 1, 'x'), ar.dimshuffle(0, 'x').T) + T.dot(scores2.dimshuffle(0, 1, 'x'), ae.dimshuffle(0, 'x').T)), av.dimshuffle(0, 'x')) # shape=(n, m, 1) sub_all_scores = T.Rebroadcast( (2, True))(sub_all_scores) # axis=2进行broadcast, 使其可被丢掉 sub_all_scores = sub_all_scores.dimshuffle(0, 1) # shape=(n, m),降一维 # 测试:T.dot(v, tanh(r*i + e*j))改为mini-batch形式。 # import numpy as np # from numpy import tanh # r = 0.1 * np.arange(3) # e = 0.2 * np.arange(3) # v = 0.3 * np.arange(3) # a = 0.1 * np.arange(6) # b = 0.2 * np.arange(6) # h = [np.dot(v, tanh(r*i + e*j)) for i,j in zip(a, b)] # h_ = [np.dot(v, tanh(r*i + e*j)) for i,j in zip(-a, -b)] # hpq = [np.dot(v, tanh(r*i + e*j) - tanh(r*k + e*t)) for i,j,k,t in zip(a,b,-a,-b)] # print(sum(np.asarray(h) - np.asarray(h_) - np.asarray(hpq))) # 正负样本 # f = np.dot(tanh(np.dot(a.reshape((2, 3, 1)), r.reshape((3, 1)).T) + # np.dot(b.reshape((2, 3, 1)), e.reshape((3, 1)).T)), # v.reshape((3, 1))) # f = f.reshape((2, 3)) # print(sum(np.asarray(h) - f.reshape(6,))) # mini-batch矩阵运算 return sub_all_scores.eval() # shape=(sub_n_user, n_item)
def random_epoch_train_pt(learning_rate=0.05, weight_decay=0.001, n_epochs=200, batch_size=500, name='Fashion'): pre_trained_name = 'FashionMnist_0.05_0.001_[20, 30]no_decay_tanh' datasets = loaddata_mnist() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train = train_set_x.get_value(borrow=True).shape[0] n_valid = valid_set_x.get_value(borrow=True).shape[0] n_test = test_set_x.get_value(borrow=True).shape[0] # print(str(n_train), str(n_valid),str(n_test)) test_set_x = test_set_x.reshape((n_test, 1, 28, 28)) valid_set_x = valid_set_x.reshape((n_valid, 1, 28, 28)) train_set_x = train_set_x.reshape((n_train, 1, 28, 28)) temp_train_set_x = theano.shared(numpy.zeros(train_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_train_set_xx = T.Rebroadcast((1, True))(temp_train_set_x) temp_valid_set_x = theano.shared(numpy.zeros(valid_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_valid_set_xx = T.Rebroadcast((1, True))(temp_valid_set_x) temp_test_set_x = theano.shared(numpy.zeros(test_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_test_set_xx = T.Rebroadcast((1, True))(temp_test_set_x) n_train_batches = n_train // batch_size n_valid_batches = n_valid // batch_size n_test_batches = n_test // batch_size y = T.ivector('y') index = T.lscalar() dummy = T.ftensor4('dummy') update_train = (temp_train_set_x, dummy) update_valid = (temp_valid_set_x, dummy) update_test = (temp_test_set_x, dummy) replace_train = theano.function([dummy],temp_train_set_x, updates=[update_train]) replace_valid = theano.function([dummy],temp_valid_set_x, updates=[update_valid]) replace_test = theano.function([dummy],temp_test_set_x, updates=[update_test]) print('... loading the model') with open(pre_trained_name + '.pkl', 'rb') as f: layer0, layer1, layer2_input, layer2, layer3 = pickle.load(f) cost = layer3.negative_log_likelihood(y) params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) updates = [ (param_i, param_i - learning_rate * (grad_i + weight_decay * param_i)) for param_i, grad_i in zip(params, grads)] patience_increase = 2 improvement_threshold = 0.995 rand_trans_x = numpy.random.random_integers(-10, 10, 200) rand_trans_y = numpy.random.random_integers(-10, 10, 200) numpy.save('rand_trans_x.npy', rand_trans_x) numpy.save('rand_trans_y.npy', rand_trans_y) error_line = numpy.zeros(n_epochs) test_model = theano.function( [index], layer3.errors(y), givens={ layer0.input: temp_test_set_xx[index * 500: (index + 1) * 500], y: test_set_y[index * 500: (index + 1) * 500]}) validate_model = theano.function( [index], layer3.errors(y), givens={ layer0.input: temp_valid_set_xx[index * 500: (index + 1) * 500], y: valid_set_y[index * 500: (index + 1) * 500]}) train_model = theano.function( [index], cost, updates=updates, givens={ layer0.input: temp_train_set_xx[index * 500: (index + 1) * 500], y: train_set_y[index * 500: (index + 1) * 500]}) start_time = timeit.default_timer() print('... training') best_validation_loss = numpy.inf best_iter = 0 test_score = 0. patience = 20000 validation_frequency = min(n_train_batches, patience // 2) epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): horizontal = rand_trans_x[epoch] vertical = rand_trans_y[epoch] tran_test_set_x = theano_translation_updating(test_set_x, horizontal, vertical).reshape((-1, 1, 28, 28)) tran_valid_set_x = theano_translation_updating(valid_set_x, horizontal, vertical).reshape((-1, 1, 28, 28)) tran_train_set_x = theano_translation_updating(train_set_x, horizontal, vertical).reshape((-1, 1, 28, 28)) replace_test(tran_test_set_x) replace_valid(tran_valid_set_x) replace_train(tran_train_set_x) epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('Horizontal Shift:', horizontal, 'Vertical Shift:', vertical) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) error_line[epoch - 1] = this_validation_loss # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) [t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3] = \ [layer0, layer1, layer2_input, layer2, layer3] if patience <= iter: done_looping = True break with open(name + '.pkl', 'wb') as f: pickle.dump([t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3], f) error_line = error_line[0:epoch-1]*100 scipy.io.savemat(name+'.mat', mdict={'Error_Spectrum': error_line}) end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('The code for file ran for %.2fm' % ((end_time - start_time) / 60.))
def random_epoch_train_begining(learning_rate=0.05, weight_decay=0.001, nkerns=[20, 50], n_epochs=200, batch_size=500, dataset='mnist.pkl.gz', name_given='test'): #name = 'FashionMnist_'+str(learning_rate)+'_'+str(weight_decay) + '_' + str(nkerns) + 'Rand_Trans_Relu2_Begin' name = name_given rng = numpy.random.RandomState(23455) datasets = loaddata_mnist(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train = train_set_x.get_value(borrow=True).shape[0] n_valid = valid_set_x.get_value(borrow=True).shape[0] n_test = test_set_x.get_value(borrow=True).shape[0] test_set_x = test_set_x.reshape((n_test, 1, 28, 28)) valid_set_x = valid_set_x.reshape((n_valid, 1, 28, 28)) train_set_x = train_set_x.reshape((n_train, 1, 28, 28)) temp_train_set_x = theano.shared(numpy.zeros(train_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_train_set_xx = T.Rebroadcast((1, True))(temp_train_set_x) temp_valid_set_x = theano.shared(numpy.zeros(valid_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_valid_set_xx = T.Rebroadcast((1, True))(temp_valid_set_x) temp_test_set_x = theano.shared(numpy.zeros(test_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_test_set_xx = T.Rebroadcast((1, True))(temp_test_set_x) n_train_batches = n_train // batch_size n_valid_batches = n_valid // batch_size n_test_batches = n_test // batch_size x = T.matrix('x') y = T.ivector('y') index = T.lscalar() dummy = T.ftensor4('dummy') update_train = (temp_train_set_x, dummy) update_valid = (temp_valid_set_x, dummy) update_test = (temp_test_set_x, dummy) replace_train = theano.function([dummy], temp_train_set_x, updates=[update_train]) replace_valid = theano.function([dummy], temp_valid_set_x, updates=[update_valid]) replace_test = theano.function([dummy], temp_test_set_x, updates=[update_test]) print('... loading the model') layer0_input = x.reshape((batch_size, 1, 28, 28)) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) cost = layer3.negative_log_likelihood(y) params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) updates = [(param_i, param_i - learning_rate * (grad_i + weight_decay * param_i)) for param_i, grad_i in zip(params, grads)] patience_increase = 2 improvement_threshold = 0.995 start_time = timeit.default_timer() rand_trans_x = numpy.random.random_integers(-10, 10, 200) rand_trans_y = numpy.random.random_integers(-10, 10, 200) numpy.save('rand_trans_x.npy', rand_trans_x) numpy.save('rand_trans_y.npy', rand_trans_y) error_line = numpy.zeros(n_epochs) test_model = theano.function( [index], layer3.errors(y), givens={ layer0.input: temp_test_set_xx[index * 500:(index + 1) * 500], y: test_set_y[index * 500:(index + 1) * 500] }) validate_model = theano.function( [index], layer3.errors(y), givens={ layer0.input: temp_valid_set_xx[index * 500:(index + 1) * 500], y: valid_set_y[index * 500:(index + 1) * 500] }) train_model = theano.function( [index], cost, updates=updates, givens={ layer0.input: temp_train_set_xx[index * 500:(index + 1) * 500], y: train_set_y[index * 500:(index + 1) * 500] }) print('... training') best_validation_loss = numpy.inf best_iter = 0 test_score = 0. patience = 20000 validation_frequency = min(n_train_batches, patience // 2) epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): horizontal = rand_trans_x[epoch] vertical = rand_trans_y[epoch] tran_test_set_x = theano_translation_updating(test_set_x, horizontal, vertical).reshape( (-1, 1, 28, 28)) tran_valid_set_x = theano_translation_updating(valid_set_x, horizontal, vertical).reshape( (-1, 1, 28, 28)) tran_train_set_x = theano_translation_updating(train_set_x, horizontal, vertical).reshape( (-1, 1, 28, 28)) replace_test(tran_test_set_x) replace_valid(tran_valid_set_x) replace_train(tran_train_set_x) epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('Horizontal Shift:', horizontal, 'Vertical Shift:', vertical) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) error_line[epoch - 1] = this_validation_loss # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break [t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3] = \ [layer0, layer1, layer2_input, layer2, layer3] with open(name + '.pkl', 'wb') as f: pickle.dump([t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3], f) error_line = error_line[0:epoch - 1] * 100 scipy.io.savemat(name + '.mat', mdict={'Error_Spectrum': error_line}) end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('The code for file ran for %.2fm' % ((end_time - start_time) / 60.))
def __theano_predict__(self, n_in, n_hidden): """ 测试阶段再跑一遍训练序列得到各个隐层。用全部数据一次性得出所有用户的表达 """ ui, wh = self.ui, self.wh qh, rh, e, f = self.qh, self.rh, self.e, self.f winh = self.window_hidden tra_mask = T.imatrix() # shape=(n, 157) actual_batch_size = tra_mask.shape[0] seq_length = T.max(T.sum(tra_mask, axis=1)) # 获取mini-batch里各序列的长度最大值作为seq_length bi = T.alloc(self.bi, actual_batch_size, 3, n_hidden) # shape=(n, 3, 20), 原维度放在后边 bi = bi.dimshuffle(1, 2, 0) # shape=(3, 20, n) pidxs = T.imatrix() xps = self.lt[pidxs] # shape((actual_batch_size, seq_length, n_in)) xps = xps.dimshuffle( 1, 0, 2) # shape=(seq_length, batch_size, n_in)=(157, n, 20) def recurrence(xp_t, h_t_pre1): # gru unit z_r = sigmoid( T.dot(ui[:2], xp_t.T) + T.dot(wh[:2], h_t_pre1.T) + bi[:2]) z, r = z_r[0].T, z_r[1].T # shape=(n, 20) c = tanh( T.dot(ui[2], xp_t.T) + T.dot(wh[2], (r * h_t_pre1).T) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T # shape=(n, 20) return h_t batch_h0 = T.alloc(self.h0, actual_batch_size, n_hidden) h, _ = theano.scan( # h.shape=(157, n, 20) fn=recurrence, sequences=xps, outputs_info=batch_h0, n_steps=seq_length) # 每个用户最后一个隐层. hs = h.dimshuffle(1, 0, 2) # shape=(n, 157, 20) hts = hs[ # shape=(n, 20) T.arange( actual_batch_size), # 行。 行列用花式索引a[[1,2,3],[2,5,6]],需给定行列的表示 T.sum(tra_mask, axis=1) - 1] # 列。需要mask是'int32'型的。不能使用a[:, [1,2,3]]。 # 每个用户最后 winh 个隐层. # 每个用户的表达shape=(winh, 20), 最终hwins.shape=(n, winh, 20) def extract(hu, mask): le = T.sum(mask) return hu[le - winh:le] # shape=(winh, 20) chs, _ = theano.scan( # shape=(n, winh, 20) fn=extract, sequences=[hs, tra_mask], outputs_info=None, n_steps=actual_batch_size) # 用每个用户的5个隐层计算context ehs = T.dot(tanh(T.dot(chs, qh)), rh) # shape=(n, winh, 1) ehs = T.Rebroadcast((2, True))(ehs) # axis=2进行broadcast, 使其可被丢掉 ahs0 = softmax(ehs.dimshuffle(0, 1)) # shape=(n, winh),降一维。因为softmax按行处理。 ahs = ahs0.dimshuffle(0, 1, 'x') # shape=(n, winx, 1), 升一维。还原回去 ahs = T.Rebroadcast((2, True))(ahs) # axis=2进行broadcast, 使其可做乘法。 hcs = T.sum(chs * ahs, axis=1) # shape=(n, 20) # 整体表达hws,融合当前hts、上下文hcs hws = tanh(T.dot(hts, e.T) + T.dot(hcs, f.T)) # shape=(n, 20) # givens给数据 start_end = T.ivector() self.seq_predict = theano.function( inputs=[start_end], outputs=hws, givens={ pidxs: self. tra_buys_masks[start_end], # 类型是 TensorType(int32, matrix) tra_mask: self.tra_masks[start_end] })
def __theano_predict__(self, n_in, n_hidden): """ 测试阶段再跑一遍训练序列得到各个隐层。用全部数据一次性得出所有用户的表达 """ ui, wh = self.ui, self.wh qx, rx, vc = self.qx, self.rx, self.vc qh, rh, e, f = self.qh, self.rh, self.e, self.f winx, winh = self.window_input, self.window_hidden tra_mask = T.imatrix() # shape=(n, 157) actual_batch_size = tra_mask.shape[0] seq_length = T.max(T.sum(tra_mask, axis=1)) # 获取mini-batch里各序列的长度最大值作为seq_length bi = T.alloc(self.bi, actual_batch_size, 3, n_hidden) # shape=(n, 3, 20), 原维度放在后边 bi = bi.dimshuffle(1, 2, 0) # shape=(3, 20, n) pidxs = T.imatrix() xps = self.lt[pidxs] # shape((actual_batch_size, seq_length, n_in)) xps = xps.dimshuffle( 1, 0, 2) # shape=(seq_length, batch_size, n_in)=(157, n, 20) def recurrence(xp_t, h_t_pre1, cxs): # 特征、隐层都处理成shape=(batch_size, n_hidden)=(n, 20) # (n, winx, 20) = T.concatenate((((n, winx-1, 20)), ((n, 1, 20))), axis=1) # context_x # 定义定长矩阵,xp_t拼接到最底下,删除首行, 矩阵维度不变。 cxs = T.concatenate( ( cxs[:, 1:, :], # shape=(n, winx-1, 20) xp_t.dimshuffle(0, 'x', 1)), # shape=(n, 1, 20) axis=1) # shape=(n, winx, 20) exs = T.dot(tanh(T.dot(cxs, qx)), rx) # shape=(n, winx, 1) exs = T.Rebroadcast((2, True))(exs) # axis=2进行broadcast, 使其可被丢掉 axs0 = softmax(exs.dimshuffle( 0, 1)) # shape=(n, winx),降一维。因为softmax按行处理。 axs = axs0.dimshuffle(0, 1, 'x') # shape=(n, winx, 1), 升一维。还原回去。 axs = T.Rebroadcast((2, True))(axs) # axis=2进行broadcast, 使其可做乘法。 # (n, 20) = T.sum((n, winx, 20) * (n, winx, 1), axis=1) xc = T.sum(cxs * axs, axis=1) # shape=(n, 20) # gru unit z_r = sigmoid( T.dot(ui[:2], xp_t.T) + T.dot(vc[:2], xc.T) + T.dot(wh[:2], h_t_pre1.T) + bi[:2]) z, r = z_r[0].T, z_r[1].T # shape=(n, 20) c = tanh( T.dot(ui[2], xp_t.T) + T.dot(vc[2], xc.T) + T.dot(wh[2], (r * h_t_pre1).T) + bi[2]) h_t = (T.ones_like(z) - z) * h_t_pre1 + z * c.T # shape=(n, 20) return [h_t, cxs, axs0] # 每处位置的权重也返回, shape=(n, winx) batch_h0 = T.alloc(self.h0, actual_batch_size, n_hidden) cumx = T.alloc(self.lt[-1], actual_batch_size, winx, n_in) [h, _, att], _ = theano.scan( # h.shape=(157, n, 20) fn=recurrence, sequences=xps, outputs_info=[batch_h0, cumx, None], n_steps=seq_length) # 逐行获取每个用户最后的 winh 组输入的权重。 # 因为是对 winh 个 h 做 context,而每个 h 下有 winx 个输入做 context,所以每个用户取出(winh, winx) # 每个用户的权重shape=(winh, winx),最终shape=(n, winh, winx) atts = att.dimshuffle(1, 0, 2) # shape=(n, 157, winx) def extract(attu, mask): le = T.sum(mask) winxh = attu[le - winh:le] return winxh.reshape( (winh * winxh.shape[1], )) # shape=(winx*winh, ),当前时间的权重在向量的最右侧 att_winxh, _ = theano.scan( # shape=(n, winx*winh) fn=extract, sequences=[atts, tra_mask], outputs_info=None, n_steps=actual_batch_size) # 每个用户最后一个隐层. hs = h.dimshuffle(1, 0, 2) # shape=(n, 157, 20) hts = hs[ # shape=(n, 20) T.arange( actual_batch_size), # 行。 行列用花式索引a[[1,2,3],[2,5,6]],需给定行列的表示 T.sum(tra_mask, axis=1) - 1] # 列。需要mask是'int32'型的。不能使用a[:, [1,2,3]]。 # 每个用户最后 winh 个隐层. # 每个用户的表达shape=(winh, 20), 最终hwins.shape=(n, winh, 20) def extract(hu, mask): le = T.sum(mask) return hu[le - winh:le] # shape=(winh, 20) chs, _ = theano.scan( # shape=(n, winh, 20) fn=extract, sequences=[hs, tra_mask], outputs_info=None, n_steps=actual_batch_size) # 用每个用户的5个隐层计算context ehs = T.dot(tanh(T.dot(chs, qh)), rh) # shape=(n, winh, 1) ehs = T.Rebroadcast((2, True))(ehs) # axis=2进行broadcast, 使其可被丢掉 ahs0 = softmax(ehs.dimshuffle(0, 1)) # shape=(n, winh),降一维。因为softmax按行处理。 ahs = ahs0.dimshuffle(0, 1, 'x') # shape=(n, winh, 1), 升一维。还原回去 ahs = T.Rebroadcast((2, True))(ahs) # axis=2进行broadcast, 使其可做乘法。 hcs = T.sum(chs * ahs, axis=1) # shape=(n, 20) # 整体表达hws,融合当前hts、上下文hcs hws = tanh(T.dot(hts, e.T) + T.dot(hcs, f.T)) # shape=(n, 20) # 序列尾端的:winh组winx个的输入权重,winh个隐层权重。shape=(n, winx*winh+winh) all_att_winxh_winh = T.concatenate((att_winxh, ahs0), axis=1) # givens给数据 start_end = T.ivector() self.seq_predict = theano.function( inputs=[start_end], outputs=[hws, all_att_winxh_winh], givens={ pidxs: self. tra_buys_masks[start_end], # 类型是 TensorType(int32, matrix) tra_mask: self.tra_masks[start_end] })