def output_probabilistic_sep(self, mx_previous, vx_previous): # create place holders mout = [] vout = [] # compute the psi0 term psi0 = self.kern.compute_psi0_theano(self.ls, self.sf, mx_previous, vx_previous) for d in range(self.Dout): # compute the psi1 and psi2 term psi1 = self.kern.compute_psi1_theano(self.ls, self.sf, mx_previous, vx_previous, self.zu[d]) psi1psi1T = T.outer(psi1, psi1.T) psi2 = self.kern.compute_psi2_theano(self.ls, self.sf, mx_previous, vx_previous, self.zu[d]) # precompute some terms psi1Kinv = T.dot(psi1, self.Kuuinv[d]) Kinvpsi2 = T.dot(self.Kuuinv[d], psi2) Kinvpsi2Kinv = T.dot(Kinvpsi2, self.Kuuinv[d]) vconst = T.exp(2 * self.sn) + (psi0 - Talg.trace(Kinvpsi2)) mud = self.muhat[d] Sud = self.Suhat[d] moutd = T.sum(T.dot(psi1Kinv, mud)) mout.append(moutd) Splusmm = Sud + T.outer(mud, mud) voutd = vconst + Talg.trace(T.dot(Splusmm, Kinvpsi2Kinv)) - moutd**2 vout.append(T.sum(voutd)) return mout, vout
def build(self, dim): M = theano.shared(value=np.eye(dim, dtype='float32'), name='M', borrow=True) pull_error = 0. ivectors = self._x[self._neighborpairs[:, 0]] jvectors = self._x[self._neighborpairs[:, 1]] diffv = ivectors - jvectors pull_error = linalg.trace(diffv.dot(M).dot(diffv.T)) push_error = 0.0 ivectors = self._x[self._set[:, 0]] jvectors = self._x[self._set[:, 1]] lvectors = self._x[self._set[:, 2]] diffij = ivectors - jvectors diffil = ivectors - lvectors lossij = diffij.dot(M).dot(diffij.T) lossil = diffil.dot(M).dot(diffil.T) mask = T.neq(self._y[self._set[:, 0]], self._y[self._set[:, 2]]) push_error = linalg.trace(mask * T.maximum(lossij - lossil + 1, 0)) error = (1 - self.mu) * pull_error + self.mu * push_error updates = [(M, M - self._lr * T.grad(error, M))] self.M = M self.updates = updates self.pull_error = pull_error self.push_error = push_error self.built = True
def build(self, dim): M = theano.shared(value=np.eye(dim, dtype='float32'), name='M', borrow=True) pull_error = 0. ivectors = self._x[self._neighborpairs[:, 0]] jvectors = self._x[self._neighborpairs[:, 1]] diffv = ivectors - jvectors pull_error = linalg.trace(diffv.dot(M).dot(diffv.T)) push_error = 0.0 ivectors = self._x[self._set[:, 0]] jvectors = self._x[self._set[:, 1]] lvectors = self._x[self._set[:, 2]] diffij = ivectors - jvectors diffil = ivectors - lvectors lossij = diffij.dot(M).dot(diffij.T) lossil = diffil.dot(M).dot(diffil.T) mask = T.neq(self._y[self._set[:, 0]], self._y[self._set[:, 2]]) push_error = linalg.trace(mask*T.maximum(lossij - lossil + 1, 0)) error = (1-self.mu) * pull_error + self.mu * push_error updates = [(M, M - self._lr * T.grad(error, M))] self.M = M self.updates = updates self.pull_error = pull_error self.push_error = push_error self.built = True
def gaussian_kl_loss(mx, Sx, mt, St): ''' Returns KL ( Normal(mx, Sx) || Normal(mt, St) ) ''' if St is None: target_samples = mt mt, St = empirical_gaussian_params(target_samples) if Sx is None: # evaluate empirical KL (expectation over the rolled out samples) x = mx mx, Sx = empirical_gaussian_params(x) def logprob(x, m, S): delta = x - m L = cholesky(S) beta = solve_lower_triangular(L, delta.T).T lp = -0.5 * tt.square(beta).sum(-1) lp -= tt.sum(tt.log(tt.diagonal(L))) lp -= (0.5 * m.size * tt.log(2 * np.pi)).astype( theano.config.floatX) return lp return (logprob(x, mx, Sx) - logprob(x, mt, St)).mean(0) else: delta = mt - mx Stinv = matrix_inverse(St) kl = tt.log(det(St)) - tt.log(det(Sx)) kl += trace(Stinv.dot(delta.T.dot(delta) + Sx - St)) return 0.5 * kl
def spectral_radius_bound(X, log2_exponent): """ Returns upper bound on the largest eigenvalue of square symmetrix matrix X. log2_exponent must be a positive-valued integer. The larger it is, the slower and tighter the bound. Values up to 5 should usually suffice. The algorithm works by multiplying X by itself this many times. From V.Pan, 1990. "Estimating the Extremal Eigenvalues of a Symmetric Matrix", Computers Math Applic. Vol 20 n. 2 pp 17-22. Rq: an efficient algorithm, not used here, is defined in this paper. """ if X.type.ndim != 2: raise TypeError('spectral_radius_bound requires a matrix argument', X) if not isinstance(log2_exponent, int): raise TypeError('spectral_radius_bound requires an integer exponent', log2_exponent) if log2_exponent <= 0: raise ValueError('spectral_radius_bound requires a strictly positive ' 'exponent', log2_exponent) XX = X for i in xrange(log2_exponent): XX = tensor.dot(XX, XX) return tensor.pow( trace(XX), 2 ** (-log2_exponent))
def spectral_radius_bound(X, log2_exponent): """ Returns upper bound on the largest eigenvalue of square symmetrix matrix X. log2_exponent must be a positive-valued integer. The larger it is, the slower and tighter the bound. Values up to 5 should usually suffice. The algorithm works by multiplying X by itself this many times. From V.Pan, 1990. "Estimating the Extremal Eigenvalues of a Symmetric Matrix", Computers Math Applic. Vol 20 n. 2 pp 17-22. Rq: an efficient algorithm, not used here, is defined in this paper. """ if X.type.ndim != 2: raise TypeError('spectral_radius_bound requires a matrix argument', X) if not isinstance(log2_exponent, int): raise TypeError('spectral_radius_bound requires an integer exponent', log2_exponent) if log2_exponent <= 0: raise ValueError( 'spectral_radius_bound requires a strictly positive ' 'exponent', log2_exponent) XX = X for i in xrange(log2_exponent): XX = tensor.dot(XX, XX) return tensor.pow(trace(XX), 2**(-log2_exponent))
def get_tensor_traces_scan(self, tensor_in): result, updates = th.scan(fn=lambda tensor_in: nlinalg.trace(tensor_in), outputs_info=None, sequences=[tensor_in], non_sequences=[]) return result
def batch_cca_loss(y_true, y_pred): """ Return Sum of the diagonal - Sum of upper and lower triangles """ trace = TN.trace(y_true[0, :, :]) triu_sum = K.sum(K.abs(TB.triu(y_true[0, :, :], k=1))) tril_sum = K.sum(K.abs(TB.tril(y_true[0, :, :], k=-1))) return trace - tril_sum - triu_sum
def test_trace(): rng = np.random.RandomState(utt.fetch_seed()) x = theano.tensor.matrix() g = trace(x) f = theano.function([x], g) for shp in [(2, 3), (3, 2), (3, 3)]: m = rng.rand(*shp).astype(config.floatX) v = np.trace(m) assert v == f(m) xx = theano.tensor.vector() ok = False try: trace(xx) except TypeError: ok = True assert ok
def compute(self, symmetric_double_encoder, params): regularization = 0 layer_number = len(symmetric_double_encoder) for ndx, layer in enumerate(symmetric_double_encoder): hidden_x = layer.output_forward_y hidden_y = layer.output_forward_x cov_x = Tensor.dot(hidden_x.T, hidden_x) cov_y = Tensor.dot(hidden_y.T, hidden_y) gama = (ndx / layer_number) regularization += gama * 0.5 * nlinalg.trace(cov_x - Tensor.identity_like(cov_x)) regularization += (1 - gama) * 0.5 * nlinalg.trace(cov_y - Tensor.identity_like(cov_y)) return regularization
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) return bound( ((n - p - 1) * log(IVI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(p, n / 2)) / 2, n > (p - 1))
def _init_error(self): pull_error = 0.0 ivectors = self._x[self._neighborpairs[:, 0]] jvectors = self._x[self._neighborpairs[:, 1]] diffv = ivectors - jvectors pull_error = linalg.trace(diffv.dot(self.M).dot(diffv.T)) push_error = 0.0 ivectors = self._x[self._set[:, 0]] jvectors = self._x[self._set[:, 1]] lvectors = self._x[self._set[:, 2]] diffij = ivectors - jvectors diffil = ivectors - lvectors lossij = diffij.dot(self.M).dot(diffij.T) lossil = diffil.dot(self.M).dot(diffil.T) push_error = linalg.trace(T.maximum(lossij - lossil + 1, 0)) self.pull_error = pull_error self.push_error = push_error self.error = (1 - self.mu) * pull_error + self.mu * push_error
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2, gt(n, (p - 1)), all(gt(eigh(X)[0], 0)), eq(X, X.T))
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * tt.log(2) - n * tt.log(IVI) - 2 * multigammaln(n / 2., p)) / 2, matrix_pos_def(X), tt.eq(X, X.T), n > (p - 1))
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * T.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * T.log(2) - n * T.log(IVI) - 2 * multigammaln(n / 2., p)) / 2, T.all(eigh(X)[0] > 0), T.eq(X, X.T), n > (p - 1))
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2, n > (p - 1))
def resfunc(i, xvec, y, h1, h2, h3vec, U1, U2, U3ten, conjU1, conjU2, conjU3ten): deph1 = TT.exp(-g * (t2 - y)) deph2 = TT.exp(-g * (t3 - t2)) deph3 = TT.exp(-g * (xvec[i] - t3)) inhom14 = TT.exp(-s * ((xvec[i] - t3 + t2 - y)**2)) inhom23 = TT.exp(-s * (((xvec[i] - t3) - (t2 - y))**2)) r14a = (TT.dot(U1, TT.dot(m, TT.dot(p0, conjU1)))) * deph1 r23a = (TT.dot(U1, TT.dot(p0, TT.dot(m, conjU1)))) * deph1 r1 = TTnlinalg.trace( TT.dot(m, ((TT.dot( U3ten[:, :, i], TT.dot( m, TT.dot(( (TT.dot(U2, TT.dot(m, TT.dot(r14a, conjU2)))) * deph2), conjU3ten[:, :, i])))) * deph3))) * inhom14 r2 = (TTnlinalg.trace( TT.dot(m, ((TT.dot( U3ten[:, :, i], TT.dot(((TT.dot(U2, TT.dot(m, TT.dot(r23a, conjU2)))) * deph2), TT.dot(m, conjU3ten[:, :, i])))) * deph3)))) * inhom23 r3 = (TTnlinalg.trace( TT.dot(m, ((TT.dot( U3ten[:, :, i], TT.dot( m, TT.dot(((TT.dot(U2, TT.dot(r23a, TT.dot(m, conjU2)))) * deph2), conjU3ten[:, :, i])))) * deph3)))) * inhom23 r4 = (TTnlinalg.trace( TT.dot(m, ((TT.dot( U3ten[:, :, i], TT.dot(((TT.dot(U2, TT.dot(r14a, TT.dot(m, conjU2)))) * deph2), TT.dot(m, conjU3ten[:, :, i])))) * deph3)))) * inhom14 return (1j * 1j * 1j) * h1 * h2 * h3vec[i] * ( r1 + r2 + r3 + r4 - TT.conj(r1) - TT.conj(r2) - TT.conj(r3) - TT.conj(r4))
def output_probabilistic_sep(self, mx_previous, vx_previous): # create place holders mout = [] vout = [] # compute the psi0 term psi0 = self.kern.compute_psi0_theano( self.ls, self.sf, mx_previous, vx_previous ) for d in range(self.Dout): # compute the psi1 and psi2 term psi1 = self.kern.compute_psi1_theano( self.ls, self.sf, mx_previous, vx_previous, self.zu[d] ) psi1psi1T = T.outer(psi1, psi1.T) psi2 = self.kern.compute_psi2_theano( self.ls, self.sf, mx_previous, vx_previous, self.zu[d] ) # precompute some terms psi1Kinv = T.dot(psi1, self.Kuuinv[d]) Kinvpsi2 = T.dot(self.Kuuinv[d], psi2) Kinvpsi2Kinv = T.dot(Kinvpsi2, self.Kuuinv[d]) vconst = T.exp(2 * self.sn) + (psi0 - Talg.trace(Kinvpsi2)) mud = self.muhat[d] Sud = self.Suhat[d] moutd = T.sum(T.dot(psi1Kinv, mud)) mout.append(moutd) Splusmm = Sud + T.outer(mud, mud) voutd = vconst + Talg.trace(T.dot(Splusmm, Kinvpsi2Kinv)) - moutd ** 2 vout.append(T.sum(voutd)) return mout, vout
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2, gt(n, (p - 1)), all(gt(eigh(X)[0], 0)), eq(X, X.T) )
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound(((n - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * tt.log(2) - n * tt.log(IVI) - 2 * multigammaln(n / 2., p)) / 2, matrix_pos_def(X), tt.eq(X, X.T), n > (p - 1))
def logp(self, X): nu = self.nu p = self.p V = self.V IVI = det(V) IXI = det(X) return bound(((nu - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) - nu * p * tt.log(2) - nu * tt.log(IVI) - 2 * multigammaln(nu / 2., p)) / 2, matrix_pos_def(X), tt.eq(X, X.T), nu > (p - 1), broadcast_conditions=False)
def logp(self, X): nu = self.nu p = self.p V = self.V IVI = det(V) IXI = det(X) return bound(((nu - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) - nu * p * tt.log(2) - nu * tt.log(IVI) - 2 * multigammaln(nu / 2., p)) / 2, matrix_pos_def(X), tt.eq(X, X.T), nu > (p - 1), broadcast_conditions=False )
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ( (n - p - 1) * T.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * T.log(2) - n * T.log(IVI) - 2 * multigammaln(n / 2.0, p) ) / 2, T.all(eigh(X)[0] > 0), T.eq(X, X.T), n > (p - 1), )
def __theano_longtermError(self, targetM, i, lastM): mask = T.neq(self._y[self._set[:, 1]], self._y[self._set[:, 2]]) f = T.tanh #T.nnet.sigmoid if i == 0: # pull_error for global 0 pull_error = 0. ivectors = self._stackx[:, i, :][self._neighborpairs[:, 0]] jvectors = self._stackx[:, i, :][self._neighborpairs[:, 1]] diffv = ivectors - jvectors pull_error = linalg.trace(diffv.dot(targetM).dot(diffv.T)) # push_error for global 0 push_error = 0.0 ivectors = self._stackx[:, i, :][self._set[:, 0]] jvectors = self._stackx[:, i, :][self._set[:, 1]] lvectors = self._stackx[:, i, :][self._set[:, 2]] diffij = ivectors - jvectors diffil = ivectors - lvectors lossij = diffij.dot(targetM).dot(diffij.T) lossil = diffil.dot(targetM).dot(diffil.T) #cur_prediction = T.diag(lossij - lossil) cur_prediction = f(T.diag(lossil - lossij)) ivectors = self._stackx[:, i-1, :][self._set[:, 0]] jvectors = self._stackx[:, i-1, :][self._set[:, 1]] lvectors = self._stackx[:, i-1, :][self._set[:, 2]] diffij = ivectors - jvectors diffil = ivectors - lvectors lossij = diffij.dot(diffij.T) lossil = diffil.dot(diffil.T) #lst_prediction = T.diag(lossij - lossil) lst_prediction = f(T.diag(lossil - lossij)) push_error = T.sum(mask*(lst_prediction - cur_prediction)) else: ivectors = self._stackx[:, i, :][self._neighborpairs[:, 0]] jvectors = self._stackx[:, i, :][self._neighborpairs[:, 1]] diffv1 = ivectors - jvectors distMcur = diffv1.dot(targetM).dot(diffv1.T) ivectors = self._stackx[:, i-1, :][self._neighborpairs[:, 0]] jvectors = self._stackx[:, i-1, :][self._neighborpairs[:, 1]] diffv2 = ivectors - jvectors distMlast = diffv2.dot(lastM).dot(diffv2.T) pull_error = linalg.trace(T.maximum(distMcur - distMlast + 1, 0)) # self.debug.append( self._y[self._set[:, 0] ) push_error = 0.0 ivectors = self._stackx[:, i, :][self._set[:, 0]] jvectors = self._stackx[:, i, :][self._set[:, 1]] lvectors = self._stackx[:, i, :][self._set[:, 2]] diffij = ivectors - jvectors diffil = ivectors - lvectors lossij = diffij.dot(targetM).dot(diffij.T) lossil = diffil.dot(targetM).dot(diffil.T) #cur_prediction = T.diag(lossij - lossil) cur_prediction = f(T.diag(lossil - lossij)) ivectors = self._stackx[:, i-1, :][self._set[:, 0]] jvectors = self._stackx[:, i-1, :][self._set[:, 1]] lvectors = self._stackx[:, i-1, :][self._set[:, 2]] diffij = ivectors - jvectors diffil = ivectors - lvectors lossij = diffij.dot(lastM).dot(diffij.T) lossil = diffil.dot(lastM).dot(diffil.T) #lst_prediction = T.diag(lossij - lossil) lst_prediction = f(T.diag(lossil - lossij)) push_error = T.sum(mask*(lst_prediction - cur_prediction)) return pull_error, push_error
def cmmd(dataset='mnist.pkl.gz',batch_size=500, layer_num = 2, hidden_dim = 20,seed = 0,layer_size=[500,200,100]): validation_frequency = 1 test_frequency = 1 pre_train = 0 pre_train_epoch = 30 print "Loading data ......." datasets = datapy.load_data_gpu_60000(dataset, have_matrix = True) train_set_x, train_set_y, train_y_matrix = datasets[0] valid_set_x, valid_set_y, valid_y_matrix = datasets[1] test_set_x, test_set_y, test_y_matrix = datasets[2] n_train_batches = train_set_x.get_value().shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) ################################ ## build model ## ################################ print "Building model ......." index = T.lscalar() x = T.matrix('x') ##### batch_size * 28^2 y = T.vector('y') y_matrix = T.matrix('y_matrix') random_z = T.matrix('random_z') ### batch_size * hidden_dim Inv_K_d = T.matrix('Inv_K_d') layers = [] layer_output= [] activation = nonlinearity.relu #activation = Tnn.sigmoid #### first layer layers.append(FullyConnected.FullyConnected( rng = rng, n_in = 28*28 + hidden_dim, #n_in = 28*28, n_out = layer_size[0], activation = activation )) layer_output.append(layers[-1].output_mix(input=[x,random_z])) #layer_output.append(layers[-1].output(input=x)) #### middle layer for i in range(layer_num): layers.append(FullyConnected.FullyConnected( rng = rng, n_in = layer_size[i], n_out = layer_size[i+1], activation = activation )) layer_output.append(layers[-1].output(input= layer_output[-1])) #### last layer activation = Tnn.sigmoid layers.append(FullyConnected.FullyConnected( rng = rng, n_in = layer_size[-1], n_out = 10, activation = activation )) y_gen = layers[-1].output(input = layer_output[-1]) lambda1_ = 1e-3 lambda_= theano.shared(np.asarray(lambda1_, dtype=np.float32)) K_d = kernel_gram_for_x(x,x,batch_size,28*28) K_s = K_d K_sd = K_d #Inv_K_d = NL.matrix_inverse(K_d +lambda_ * T.identity_like(K_d)) Inv_K_s = Inv_K_d L_d = kernel_gram(y_matrix,y_matrix,batch_size,10) L_s = kernel_gram(y_gen,y_gen,batch_size,10) L_ds = kernel_gram(y_matrix,y_gen,batch_size,10) cost = -(NL.trace(K_d * Inv_K_d * L_d * Inv_K_d) +\ NL.trace(K_s * Inv_K_s * L_s * Inv_K_s)- \ NL.trace(K_sd * Inv_K_d * L_ds * Inv_K_s)) cost_pre = -T.sum(T.sqr(y_matrix - y_gen)) cc = T.argmax(y_gen,axis=1) correct = T.sum(T.eq(T.cast(T.argmax(y_gen,axis=1),'int32'),T.cast(y,'int32'))) ################################ ## updates ## ################################ params = [] for aLayer in layers: params += aLayer.params gparams = [T.grad(cost,param) for param in params] gparams_pre = [T.grad(cost_pre,param) for param in params] learning_rate = 3e-4 weight_decay=1.0/n_train_batches epsilon=1e-8 l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) get_optimizer = optimizer.get_adam_optimizer_max(learning_rate=l_r, decay1=0.1, decay2=0.001, weight_decay=weight_decay, epsilon=epsilon) updates = get_optimizer(params,gparams) updates_pre = get_optimizer(params,gparams_pre) ################################ ## pretrain model ## ################################ parameters = theano.function( inputs = [], outputs = params, ) ''' pre_train_model = theano.function( inputs = [index,random_z], outputs = [cost_pre, correct], updates=updates_pre, givens={ x:train_set_x[index * batch_size:(index + 1) * batch_size], y:train_set_y[index * batch_size:(index + 1) * batch_size], y_matrix:train_y_matrix[index * batch_size:(index + 1) * batch_size], }, on_unused_input='warn' ) cur_epoch = 0 if pre_train == 1: for cur_epoch in range(pre_train_epoch): print 'cur_epoch: ', cur_epoch, cor = 0 for minibatch_index in range(n_train_batches): cost_pre_mini,correct_pre_mini = pre_train_model(minibatch_index,gen_random_z(batch_size,hidden_dim)) cor = cor + correct_pre_mini print 'correct number: ' , cor #np.savez(,model = model) ''' if pre_train == 1: print "pre-training model....." pre_train = np.load('model.npz')['model'] for (para, pre) in zip(params, pre_train): para.set_value(pre) ################################ ## prepare data ## ################################ #### compute matrix inverse print "Preparing data ...." Invv = NL.matrix_inverse(K_d +lambda_ * T.identity_like(K_d)) prepare_data = theano.function( inputs = [index], outputs = [Invv,K_d], givens = { x:train_set_x[index * batch_size:(index + 1) * batch_size], } ) Inv_K_d_l, K_d_l = prepare_data(0) for minibatch_index in range(1, n_train_batches): if minibatch_index % 10 == 0: print 'minibatch_index:', minibatch_index Inv_pre_mini, K_d_pre_mini = prepare_data(minibatch_index) Inv_K_d_l = np.vstack((Inv_K_d_l,Inv_pre_mini)) K_d_l = np.vstack((K_d_l,K_d_pre_mini)) Inv_K_d_g = theano.shared(Inv_K_d_l,borrow=True) K_d_g = theano.shared(K_d_l, borrow=True) ################################ ## train model ## ################################ train_model = theano.function( inputs = [index,random_z], outputs = [correct,cost,y,cc,y_gen], updates=updates, givens={ x:train_set_x[index * batch_size:(index + 1) * batch_size], y:train_set_y[index * batch_size:(index + 1) * batch_size], y_matrix:train_y_matrix[index * batch_size:(index + 1) * batch_size], #K_d:K_d_g[index * batch_size:(index + 1) * batch_size], Inv_K_d:Inv_K_d_g[index * batch_size:(index + 1) * batch_size], }, on_unused_input='warn' ) valid_model = theano.function( inputs = [index,random_z], outputs = correct, #updates=updates, givens={ x:valid_set_x[index * batch_size:(index + 1) * batch_size], y:valid_set_y[index * batch_size:(index + 1) * batch_size], y_matrix:valid_y_matrix[index * batch_size:(index + 1) * batch_size], }, on_unused_input='warn' ) test_model = theano.function( inputs = [index,random_z], outputs = [correct,y_gen], #updates=updates, givens={ x:test_set_x[index * batch_size:(index + 1) * batch_size], y:test_set_y[index * batch_size:(index + 1) * batch_size], y_matrix:test_y_matrix[index * batch_size:(index + 1) * batch_size], }, on_unused_input='warn' ) n_epochs = 500 cur_epoch = 0 print "Training model ......" while (cur_epoch < n_epochs) : cur_epoch = cur_epoch + 1 cor = 0 for minibatch_index in xrange(n_train_batches): print minibatch_index, print " : ", correct,cost,a,b,y_gen = train_model(minibatch_index,gen_random_z(batch_size,hidden_dim)) cor = cor + correct print correct print b print y_gen with open('log.txt','a') as f: print >>f , "epoch: " , cur_epoch, "training_correct: " , cor if cur_epoch % validation_frequency == 0: cor2 = 0 for minibatch_index in xrange(n_valid_batches): correct = valid_model(minibatch_index,gen_random_z(batch_size,hidden_dim)) cor2 = cor2 + correct with open('log.txt','a') as f: print >>f , " validation_correct: " , cor2 if cur_epoch % test_frequency == 0: cor2 = 0 for minibatch_index in xrange(n_test_batches): correct,y_gen = test_model(minibatch_index,gen_random_z(batch_size,hidden_dim)) with open('log.txt','a') as f: for index in range(batch_size): if not np.argmax(y_gen[index]) == test_set_y[minibatch_index * batch_size + index]: print >>f , "index: " , minibatch_index * batch_size + index, 'true Y: ', test_set_y[minibatch_index * batch_size + index] print >>f , 'gen_y: ' , y_gen[index] cor2 = cor2 + correct with open('log.txt','a') as f: print >>f , " test_correct: " , cor2 if epoch %1 == 0: model = parameters() for i in range(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) np.savez('model-'+str(epoch),model=model)
def cmmd(dataset='mnist.pkl.gz', batch_size=100, layer_num=3, hidden_dim=5, seed=0, layer_size=[64, 256, 256, 512]): validation_frequency = 1 test_frequency = 1 pre_train = 1 dim_input = (28, 28) colorImg = False print "Loading data ......." #datasets = datapy.load_data_gpu_60000_with_noise(dataset, have_matrix = True) datasets = datapy.load_data_gpu_60000(dataset, have_matrix=True) train_set_x, train_set_y, train_y_matrix = datasets[0] valid_set_x, valid_set_y, valid_y_matrix = datasets[1] test_set_x, test_set_y, test_y_matrix = datasets[2] rng = np.random.RandomState(seed) rng_share = theano.tensor.shared_randomstreams.RandomStreams(0) n_train_batches = train_set_x.get_value().shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size aImage = paramgraphics.mat_to_img(train_set_x.get_value()[0:169].T, dim_input, colorImg=colorImg) aImage.save('mnist_sample', 'PNG') ################################ ## build model ## ################################ print "Building model ......." index = T.lscalar() x = T.matrix('x') ##### batch_size * 28^2 y = T.vector('y') y_matrix = T.matrix('y_matrix') random_z = T.matrix('random_z') ### batch_size * hidden_dim Inv_K_d = T.matrix('Inv_K_d') layers = [] layer_output = [] activation = nonlinearity.relu #activation = Tnn.sigmoid #### first layer layers.append( FullyConnected.FullyConnected( rng=rng, n_in=10 + hidden_dim, #n_in = 10, n_out=layer_size[0], activation=activation)) layer_output.append(layers[-1].output_mix(input=[y_matrix, random_z])) #layer_output.append(layers[-1].output_mix2(input=[y_matrix,random_z])) #layer_output.append(layers[-1].output(input=x)) #layer_output.append(layers[-1].output(input=random_z)) #### middle layer for i in range(layer_num): layers.append( FullyConnected.FullyConnected(rng=rng, n_in=layer_size[i], n_out=layer_size[i + 1], activation=activation)) layer_output.append(layers[-1].output(input=layer_output[-1])) #### last layer activation = Tnn.sigmoid #activation = nonlinearity.relu layers.append( FullyConnected.FullyConnected(rng=rng, n_in=layer_size[-1], n_out=28 * 28, activation=activation)) x_gen = layers[-1].output(input=layer_output[-1]) lambda1_ = 100 lambda_ = theano.shared(np.asarray(lambda1_, dtype=np.float32)) K_d = kernel_gram_for_y(y_matrix, y_matrix, batch_size, 10) K_s = K_d K_sd = K_d Invv_1 = T.sum(y_matrix, axis=0) / batch_size Invv = NL.alloc_diag(1 / Invv_1) Inv_K_d = Invv #Inv_K_d = NL.matrix_inverse(K_d +lambda_ * T.identity_like(K_d)) Inv_K_s = Inv_K_d L_d = kernel_gram_for_x(x, x, batch_size, 28 * 28) L_s = kernel_gram_for_x(x_gen, x_gen, batch_size, 28 * 28) L_ds = kernel_gram_for_x(x, x_gen, batch_size, 28 * 28) ''' cost = -(NL.trace(T.dot(T.dot(T.dot(K_d, Inv_K_d), L_d), Inv_K_d)) +\ NL.trace(T.dot(T.dot(T.dot(K_s, Inv_K_s), L_s),Inv_K_s))- \ 2 * NL.trace(T.dot(T.dot(T.dot(K_sd, Inv_K_d) ,L_ds ), Inv_K_s))) ''' ''' cost = -(NL.trace(T.dot(L_d, T.ones_like(L_d) )) +\ NL.trace(T.dot(L_s,T.ones_like(L_s)))- \ 2 * NL.trace(T.dot(L_ds,T.ones_like(L_ds) ))) cost2 = 2 * T.sum(L_ds) - T.sum(L_s) + NL.trace(T.dot(L_s, T.ones_like(L_s)))\ - 2 * NL.trace( T.dot(L_ds , T.ones_like(L_ds))) cost2 = T.dot(T.dot(Inv_K_d, K_d),Inv_K_d) ''' cost2 = K_d #cost2 = T.dot(T.dot(Inv_K_d,K_d),Inv_K_d) #cost = - T.sum(L_d) +2 * T.sum(L_ds) - T.sum(L_s) cost2 = K_d cost2 = T.dot(T.dot(T.dot(y_matrix, Inv_K_d), Inv_K_d), y_matrix.T) cost = -(NL.trace(T.dot(T.dot(T.dot(T.dot(L_d, y_matrix),Inv_K_d), Inv_K_d),y_matrix.T)) +\ NL.trace(T.dot(T.dot(T.dot(T.dot(L_s, y_matrix),Inv_K_s), Inv_K_s),y_matrix.T))- \ 2 * NL.trace(T.dot(T.dot(T.dot(T.dot(L_ds, y_matrix),Inv_K_d), Inv_K_s),y_matrix.T))) ''' cost = - T.sum(L_d) +2 * T.sum(L_ds) - T.sum(L_s) cost = - NL.trace(K_s * Inv_K_s * L_s * Inv_K_s)+ \ 2 * NL.trace(K_sd * Inv_K_d * L_ds * Inv_K_s) ''' ################################ ## updates ## ################################ params = [] for aLayer in layers: params += aLayer.params gparams = [T.grad(cost, param) for param in params] learning_rate = 3e-4 weight_decay = 1.0 / n_train_batches epsilon = 1e-8 l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32)) get_optimizer = optimizer.get_adam_optimizer_max(learning_rate=l_r, decay1=0.1, decay2=0.001, weight_decay=weight_decay, epsilon=epsilon) updates = get_optimizer(params, gparams) ################################ ## pretrain model ## ################################ parameters = theano.function( inputs=[], outputs=params, ) gen_fig = theano.function( inputs=[y_matrix, random_z], outputs=x_gen, on_unused_input='warn', ) if pre_train == 1: print "pre-training model....." pre_train = np.load('./result/MMD-100-5-64-256-256-512.npz')['model'] for (para, pre) in zip(params, pre_train): para.set_value(pre) s = 8 for jj in range(10): a = np.zeros((s, 10), dtype=np.float32) for ii in range(s): kk = random.randint(0, 9) a[ii, kk] = 1 x_gen = gen_fig(a, gen_random_z(s, hidden_dim)) ttt = train_set_x.get_value() for ll in range(s): minn = 1000000 ss = 0 for kk in range(ttt.shape[0]): tt = np.linalg.norm(x_gen[ll] - ttt[kk]) if tt < minn: minn = tt ss = kk #np.concatenate(x_gen,ttt[ss]) x_gen = np.vstack((x_gen, ttt[ss])) aImage = paramgraphics.mat_to_img(x_gen.T, dim_input, colorImg=colorImg) aImage.save('samples_' + str(jj) + '_similar', 'PNG') ################################ ## prepare data ## ################################ #### compute matrix inverse #print "Preparing data ...." #Invv = NL.matrix_inverse(K_d +lambda_ * T.identity_like(K_d)) ''' Invv_1 = T.sum(y_matrix,axis=0)/batch_size Invv = NL.alloc_diag(1/Invv_1) Inv_K_d = Invv prepare_data = theano.function( inputs = [index], outputs = [Invv,K_d], givens = { #x:train_set_x[index * batch_size:(index + 1) * batch_size], y_matrix:train_y_matrix[index * batch_size:(index + 1) * batch_size], } ) Inv_K_d_l, K_d_l = prepare_data(0) print Inv_K_d_l for minibatch_index in range(1, n_train_batches): if minibatch_index % 10 == 0: print 'minibatch_index:', minibatch_index Inv_pre_mini, K_d_pre_mini = prepare_data(minibatch_index) Inv_K_d_l = np.vstack((Inv_K_d_l,Inv_pre_mini)) K_d_l = np.vstack((K_d_l,K_d_pre_mini)) Inv_K_d_g = theano.shared(Inv_K_d_l,borrow=True) K_d_g = theano.shared(K_d_l, borrow=True) ''' ################################ ## train model ## ################################ train_model = theano.function( inputs=[index, random_z], outputs=[cost, x_gen, cost2], updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], y_matrix: train_y_matrix[index * batch_size:(index + 1) * batch_size], #K_d:K_d_g[index * batch_size:(index + 1) * batch_size], #Inv_K_d:Inv_K_d_g[index * batch_size:(index + 1) * batch_size], }, on_unused_input='warn') n_epochs = 500 cur_epoch = 0 print "Training model ......" while (cur_epoch < n_epochs): cur_epoch = cur_epoch + 1 cor = 0 for minibatch_index in xrange(n_train_batches): print minibatch_index, print " : ", cost, x_gen, cost2 = train_model( minibatch_index, gen_random_z(batch_size, hidden_dim)) print 'cost: ', cost print 'cost2: ', cost2 if minibatch_index % 30 == 0: aImage = paramgraphics.mat_to_img(x_gen[0:1].T, dim_input, colorImg=colorImg) aImage.save( 'samples_epoch_' + str(cur_epoch) + '_mini_' + str(minibatch_index), 'PNG') if cur_epoch % 1 == 0: model = parameters() for i in range(len(model)): model[i] = np.asarray(model[i]).astype(np.float32) np.savez('model-' + str(cur_epoch), model=model)