Exemplo n.º 1
0
 def gibbs(self, sample, countStep, function_mode, h_lid_type = 0):
     # templates of Varibles for calculate h_lid by previous value
     calc_h_lid = lambda h_lid_old, sample: T.nnet.sigmoid(T.dot(sample, self.W) + self.hBiasbase) #+ T.dot(h_lid_old, self.W2.T)
     calc_hBiases = lambda h_lid: self.hBiasbase + T.dot(h_lid, self.W2.T)
     calc_vBiases = lambda h_lid: self.vBiasbase + T.dot(h_lid, self.W1.T)
     #   Parameter: countGibbsStep
     def gibbsSamplingForAllTime(sample, start_h_lid):
         def gibbsSamplingForOneStepTime(sample, h_lid):
             vBias = calc_vBiases(h_lid)
             hBias = calc_hBiases(h_lid)
             res, updates = self.bm.gibbs(sample, self.W, vBias, hBias, countStep, function_mode)
             #res = res[-1]
             if h_lid_type == 0:
                 return [res, calc_h_lid(h_lid, sample), vBias, hBias], updates
             else:
                 return [res, calc_h_lid(h_lid, res), vBias, hBias], updates
         [sample_res, hLids, vBiases, hBiases], updates = theano.scan(gibbsSamplingForOneStepTime, sequences=sample, outputs_info=[None, start_h_lid, None, None])
         return sample_res, hLids, vBiases, hBiases, updates
     # usual gibbs-sampling
     if len(sample.broadcastable) == 2:
     #     matrix! it is one object
         res, hLids, vBiases, hBiases, updates = gibbsSamplingForAllTime([sample], self.h_lid_0)
         hLids = T.concatenate([[self.h_lid_0], hLids[0:-1]])
         return res, hLids, updates, vBiases, hBiases
     else:
         new_dim = T.cast(sample.shape[0], 'int32');
         my_sample = T.transpose(sample, (1, 0, 2))
         h_lids_start = T.reshape(T.repeat(self.h_lid_0, new_dim), (self.hidden, new_dim)).T
         res, hLids, vBiases, hBiases, updates = gibbsSamplingForAllTime(my_sample, h_lids_start)
         res = T.transpose(res, (1, 0, 2))
         hLids = T.concatenate([[h_lids_start], hLids[0:-1]])
         hLids = T.transpose(hLids, (1, 0, 2))
         vBiases = T.transpose(vBiases, (1, 0, 2))
         hBiases = T.transpose(hBiases, (1, 0, 2))
         return res, hLids, updates, vBiases, hBiases
Exemplo n.º 2
0
 def sgru3(X, h, W, U, b, t):
     t = 0
     z_t = T.tanh(T.dot(X,W[t*2+0]) + b[t*2+0])
     r_t = (T.dot(h,U[t*2+0]) + b[t*2+1])
     z_t2 = (T.dot(X,W[t*2+1]) + b[t*2+2])
     r_t2 = T.tanh(T.dot(h,U[t*2+1]) + b[t*2+3])
     return T.tanh(T.dot(z_t*r_t,T.transpose(U[t*2+2])) + T.dot(z_t2*r_t2,T.transpose(U[t*2+3]))) 
Exemplo n.º 3
0
 def full(self, X, Xs=None):
     X, Xc, Xs = self._common(X, Xs)
     if Xs is None:
         return tt.dot(Xc, tt.transpose(Xc))
     else:
         Xsc = tt.sub(Xs, self.c)
         return tt.dot(Xc, tt.transpose(Xsc))
Exemplo n.º 4
0
def forward_batch_step(x_t, H_mask, H_tm1):
    H = TT.dot(W_rec,H_tm1) + W_in[:,x_t]
    H_t = TT.nnet.sigmoid(H)
    Y_t = TT.nnet.softmax(TT.transpose(TT.dot(W_out, H_t)))
    Y_t = -TT.log2(Y_t)
    Y_t = TT.dot(TT.transpose(Y_t), TT.diag(H_mask))
    return [H_t, Y_t]
Exemplo n.º 5
0
 def bbprop(self):
     self.lin_bbprop = self.p_y_given_x - self.p_y_given_x * self.p_y_given_x
     self.lin_bbprop /= T.shape(self.p_y_given_x)[0]
     self.dict_bbprop = {}
     self.dict_bbprop.update({self.b_upmask: T.sum(self.lin_bbprop, 0)})
     self.dict_bbprop.update({self.W_upmask: T.dot(T.transpose(self.inp * self.inp), self.lin_bbprop)})
     return T.dot(self.lin_bbprop, T.transpose(self.W * self.W)), self.dict_bbprop
Exemplo n.º 6
0
def T_l2_cost_conv(x,a,A,imshp,kshp,mask=True):
    """
    xsz*ysz*nchannels, nimages = x.shape
    xsz*ysz*nfeat, nimages = a.shape
    xsz*ysz*nchannels, nfeat = A.shape
    """

    #imshp = num images, channels, szy, szx
    #kshp = features, channels, szy, szx
    #featshp = num images, features, szy, szx

    featshp = (imshp[0],kshp[0],imshp[2] - kshp[2] + 1,imshp[3] - kshp[3] + 1) # num images, features, szy, szx

    image = T.reshape(T.transpose(x),imshp)
    kernel = T.reshape(T.transpose(A),kshp)
    features = T.reshape(T.transpose(a),featshp)

    # Need to transpose first two dimensions of kernel, and reverse index kernel image dims (for correlation)
    kernel_rotated = T.transpose(kernel[:,:,::-1,::-1],axes=[1,0,2,3])

    image_estimate = conv2d(features,kernel_rotated,border_mode='full')

    if mask:
        image_error_temp = image - image_estimate
        image_error = T.zeros_like(image_error_temp)
        image_error = T.set_subtensor(image_error[:,:,(kshp[2]-1):(imshp[2]-kshp[2]+1),(kshp[3]-1):(imshp[3]-kshp[3]+1)],
                                 image_error_temp[:,:,(kshp[2]-1):(imshp[2]-kshp[2]+1),(kshp[3]-1):(imshp[3]-kshp[3]+1)])
    else:
        image_error = image - image_estimate

    return .5*T.sum(image_error **2)
Exemplo n.º 7
0
def nin(X, param):
    w1, w2, w3, b1, b2, b3 = param
    X = X.dimshuffle(0, 1, 'x', 2, 3)  # (n,32,1,r,c)
    w1 = w1.dimshuffle(0, 1, 2, 'x', 3, 4)  # (64,32,16,1,3,3)
    w2 = w2.dimshuffle(0, 1, 'x', 2, 'x', 'x')  # (64,32,1,16,1,1)
    w3 = w3.dimshuffle(0, 1, 2, 'x', 'x')  # (64,2,32,1,1)
    b1 = b1.dimshuffle(0, 1, 'x', 2, 'x', 'x')  # (64,32,1,16,1,1)
    b2 = b2.dimshuffle(0, 1, 'x', 2, 'x', 'x')  # (64,32,1,1,1,1)
    b3 = b3.dimshuffle(0, 'x', 1, 'x', 'x')  # (64,1,2,1,1)
    indexi = T.arange(w1.shape[0], dtype='int32')  # (0:64)
    indexi = T.repeat(indexi, w1.shape[1], axis=0)
    indexj = T.arange(w1.shape[1], dtype='int32')  # (0:64)
    indexj = T.tile(indexj, w1.shape[0])
    results, updates = scan(fn=metaOp1,
                            sequences=[indexi, indexj],
                            outputs_info=None,
                            non_sequences=[X, w1, w2, b1, b2],
                            strict=True)  # (64*32,n,1,r,c)
    metaShape1 = results.shape[-4], results.shape[-2], results.shape[-1]
    reshaped1 = results.reshape((w1.shape[0], w1.shape[1]) + metaShape1)  # (64,32,n,r,c)
    permuted1 = T.transpose(reshaped1, axes=(0, 2, 1, 3, 4))  # (64,n,32,r,c)
    indexi = T.arange(w1.shape[0], dtype='int32')  # (0:64)
    results, updates = scan(fn=metaOp2,
                            sequences=[indexi],
                            outputs_info=None,
                            non_sequences=[permuted1, w3, b3],
                            strict=True)  # (64,n,2,r,c)
    permuted2 = T.transpose(results, axes=(1, 0, 2, 3, 4))  # (n,64,2,r,c)
    metaShape2 = permuted2.shape[-2], permuted2.shape[-1]
    reshaped2 = permuted2.reshape((permuted2.shape[0], -1) + metaShape2)  # (n,128,r,c)
    return reshaped2
Exemplo n.º 8
0
def theano_kernel_derivative(imshp,kshp,featshp,stride=1):

    features = T.tensor4(dtype=theano.config.floatX)
    kernel = T.tensor4(dtype=theano.config.floatX)
    image = T.tensor4(dtype=theano.config.floatX)

    # Need to transpose first two dimensions of kernel, and reverse index kernel image dims (for correlation)
    kernel_rotated = T.transpose(kernel[:,:,::-1,::-1],axes=[1,0,2,3])

    featshp_logical = (featshp[0],featshp[1],featshp[2]*stride,featshp[3]*stride)
    kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
    image_estimate = conv2d(features,kernel_rotated,border_mode='full',
                            image_shape=featshp,filter_shape=kshp_rotated,
                            imshp_logical=featshp_logical[1:],kshp_logical=kshp[2:])

    image_error = image - image_estimate

    image_error_rot = T.transpose(image_error,[1,0,2,3])[:,:,::-1,::-1]
    imshp_rot = (imshp[1],imshp[0],imshp[2],imshp[3])
    featshp_rot = (featshp[1],featshp[0],featshp[2],featshp[3])
    features_rot = T.transpose(features,[1,0,2,3])

    featshp_rot_logical = (featshp_rot[0],featshp_rot[1],featshp_rot[2]*stride,featshp_rot[3]*stride)
    kernel_grad_rot = -conv2d(image_error_rot,features_rot,
                              image_shape=imshp_rot,filter_shape=featshp_rot,
                              imshp_logical=imshp_rot[1:],kshp_logical=featshp_rot_logical[2:])
    kernel_grad = T.transpose(kernel_grad_rot,[1,0,2,3])

    return function(inputs=[image,features,kernel],outputs=kernel_grad)
Exemplo n.º 9
0
	def __init__(self, rng, input, n_feature_maps, n_in, n_out, b_size=5, read_file=False, W=None, b=None):
		
		# input dim should be: batch_size x n_feature_maps x 504
		# n_in and n_out should be 504 and 40 respectively
		input = T.transpose(input, (1, 0, 2))
		self.input = input
		if read_file==False:
			W_values = np.asarray(
				rng.uniform(
					low=-np.sqrt(6./(n_in+n_out)),
					high=np.sqrt(6./(n_in+n_out)),
					size=(n_in, n_out)
				),
				dtype=theano.config.floatX
			)
			
			W = theano.shared(value=W_values, name='W', borrow=True)

			b_values = np.zeros((n_out,), dtype=theano.config.floatX)
			b = theano.shared(value=b_values, name='b', borrow=True)

		self.W = W
		self.b = b

		embedding_list = []
		for i in range(n_feature_maps):
			embedding_list.append(T.tanh(T.dot(input[i], self.W) + self.b))
		self.output = T.concatenate(embedding_list, axis=0)
		self.output = T.reshape(self.output, (n_feature_maps, b_size, n_out))
		self.params = [self.W, self.b]

		self.input = T.transpose(self.input, (1, 0, 2))
		self.output = T.transpose(self.output, (1, 0, 2))
Exemplo n.º 10
0
def categorical_crossentropy_segm(prediction_proba, targets):
    '''
    MODIFICATIONS:
        - reshape from image-size to array and back
    '''
    shape = T.shape(prediction_proba)
    pred_mod1 = T.transpose(prediction_proba, (0,2,3,1))
    pred_mod = T.reshape(pred_mod1, (-1,shape[1]))
    if prediction_proba.ndim == targets.ndim:
        targ_mod1 = T.transpose(targets,(0,2,3,1))
        targ_mod = T.reshape(targ_mod1,(-1,shape[1]))
    else:
        targ_mod = T.reshape(targets, (-1,))
    results = categorical_crossentropy(pred_mod, targ_mod)


    results = T.reshape(results, (shape[0],shape[2],shape[3]))



    # QUICK IMPLEMENTATION FOR TWO SPECIFIC CLASSES. NEEDS GENERALIZATION
    # Weights depending on class occurency:
    weights = (1.02275, 44.9647)
    cars_indx, not_cars_indx = T.nonzero(targets), T.nonzero(T.eq(targets,0))
    T.set_subtensor(results[cars_indx], results[cars_indx]*float32(weights[1]) )
    T.set_subtensor(results[not_cars_indx], results[not_cars_indx]*float32(weights[0]) )


    return T.sum(results, axis=(1,2))
Exemplo n.º 11
0
def nn_param(params,input):
	from theano import tensor as T
	from matplotlib import pyplot as plt
	layers=len(params)
	if(layers==1):
		lnum=0
		p=T.nnet.sigmoid(T.dot(input,params[lnum][0][1])+params[lnum][1][1])
		y=T.nnet.sigmoid(T.dot(p,T.transpose(params[lnum][0][1]))+params[lnum][2][1])
		yval=y.eval()
		return yval

	for lnum in range(layers):
		if (lnum==0):
			p=T.nnet.sigmoid(T.dot(input,params[lnum][0][1])+params[lnum][1][1])
			y=T.nnet.sigmoid(T.dot(p,T.transpose(params[lnum][0][1]))+params[lnum][2][1])
			yval=y.eval()
			plt.plot(yval,label='%d'%lnum)
		else:
			p=T.nnet.sigmoid(T.dot(yval,params[lnum][0][1])+params[lnum][1][1])
			y=T.nnet.sigmoid(T.dot(p,T.transpose(params[lnum][0][1]))+params[lnum][2][1])
			yval=y.eval()
			plt.plot(yval)
	plt.legend()
	plt.show()
	return yval
def __init():
    dataset = T.matrix("dataset", dtype=config.globalFloatType())
    trans_dataset = T.transpose(dataset)
    dot_mul = T.dot(dataset, trans_dataset)
    l2 = T.sqrt(T.sum(T.square(dataset), axis=1))
    
#     p =printing.Print("l2")
#     l2 = p(l2)
    
    l2_inv2 = T.inv(l2).dimshuffle(['x', 0])
#     p =printing.Print("l2_inv2")
#     l2_inv2 = p(l2_inv2)
    
    l2_inv1 = T.transpose(l2_inv2)
#     p =printing.Print("l2_inv1")
#     l2_inv1 = p(l2_inv1)
    
    l2_inv = T.dot(l2_inv1, l2_inv2)
    
#     p =printing.Print("l2_inv")
#     l2_inv = p(l2_inv)
    
    affinty = (T.mul(dot_mul, l2_inv) + 1) / 2
    globals()['__affinty_fun'] = theano.function(
             [dataset],
             [affinty],
             allow_input_downcast=True
             )
Exemplo n.º 13
0
 def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total):
     sigma2 = tt.square(sigma)
     Kuu = cov_total(Xu)
     Kuf = cov_total(Xu, X)
     Luu = cholesky(stabilize(Kuu))
     A = solve_lower(Luu, Kuf)
     Qffd = tt.sum(A * A, 0)
     if self.approx == "FITC":
         Kffd = cov_total(X, diag=True)
         Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
     else:  # VFE or DTC
         Lamd = tt.ones_like(Qffd) * sigma2
     A_l = A / Lamd
     L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
     r = y - mean_total(X)
     r_l = r / Lamd
     c = solve_lower(L_B, tt.dot(A, r_l))
     Kus = self.cov_func(Xu, Xnew)
     As = solve_lower(Luu, Kus)
     mu = self.mean_func(Xnew) + tt.dot(tt.transpose(As), solve_upper(tt.transpose(L_B), c))
     C = solve_lower(L_B, As)
     if diag:
         Kss = self.cov_func(Xnew, diag=True)
         var = Kss - tt.sum(tt.square(As), 0) + tt.sum(tt.square(C), 0)
         if pred_noise:
             var += sigma2
         return mu, var
     else:
         cov = (self.cov_func(Xnew) - tt.dot(tt.transpose(As), As) +
                tt.dot(tt.transpose(C), C))
         if pred_noise:
             cov += sigma2 * tt.identity_like(cov)
         return mu, stabilize(cov)
Exemplo n.º 14
0
        def kmaxpooling_output(input):
            '''
                实现 k-max pooling
                    1. 先排序
                    2. 再分别取出前k个值
            :param k: k top higiest value
            :type k: int
            :return:
            '''
            input = T.transpose(input, axes=(0, 1, 3, 2))
            sorted_values = T.argsort(input, axis=3)
            topmax_indexes = sorted_values[:, :, :, -k:]
            # sort indexes so that we keep the correct order within the sentence
            topmax_indexes_sorted = T.sort(topmax_indexes)

            # given that topmax only gives the index of the third dimension, we need to generate the other 3 dimensions
            dim0 = T.arange(0, input.shape[0]).repeat(input.shape[1] * input.shape[2] * k)
            dim1 = T.arange(0, input.shape[1]).repeat(k * input.shape[2]).reshape((1, -1)).repeat(input.shape[0],
                                                                                                  axis=0).flatten()
            dim2 = T.arange(0, input.shape[2]).repeat(k).reshape((1, -1)).repeat(input.shape[0] * input.shape[1],
                                                                                 axis=0).flatten()
            dim3 = topmax_indexes_sorted.flatten()
            return T.transpose(
                input[dim0, dim1, dim2, dim3].reshape((input.shape[0], input.shape[1], input.shape[2], k)),
                axes=(0, 1, 3, 2))
Exemplo n.º 15
0
def T_subspacel1_slow_shrinkage_conv(a, L, lam_sparse, lam_slow, imshp,kshp,featshp,stride=(1,1),small_value=.001):
    featshp = (imshp[0],kshp[0],featshp[2],featshp[3]) # num images, features, szy, szx
    features = T.reshape(T.transpose(a),featshp,ndim=4)

    amp = T.sqrt(features[:,::2,:,:]**2 + features[:,1::2,:,:]**2 + small_value)
    #damp = amp[:,1:] - amp[:,:-1]

    # compose slow shrinkage with subspace l1 shrinkage

    # slow shrinkage
    div = T.zeros_like(amp)
    d1 = amp[1:,:,:,:] - amp[:-1,:,:,:]
    d2 = d1[1:,:,:,:] - d1[:-1,:,:,:]
    div = T.set_subtensor(div[1:-1,:,:,:], -d2)
    div = T.set_subtensor(div[0,:,:,:], -d1[0,:,:,:])
    div = T.set_subtensor(div[-1,:,:,:], d1[-1,:,:,:])
    slow_amp_shrinkage = 1 - (lam_slow / L) * (div / amp)
    slow_amp_value = T.switch(T.gt(slow_amp_shrinkage, 0), slow_amp_shrinkage, 0)
    slow_shrinkage_prox_a = slow_amp_value * features[:, ::2, :,:]
    slow_shrinkage_prox_b = slow_amp_value * features[:,1::2, :,:]

    # subspace l1 shrinkage
    amp_slow_shrinkage_prox = T.sqrt(slow_shrinkage_prox_a ** 2 + slow_shrinkage_prox_b ** 2)
    #amp_shrinkage = 1. - (lam_slow*lam_sparse/L)*amp_slow_shrinkage_prox
    amp_shrinkage = 1. - (lam_sparse / L) / amp_slow_shrinkage_prox
    amp_value = T.switch(T.gt(amp_shrinkage, 0.), amp_shrinkage, 0.)
    subspacel1_prox = T.zeros_like(features)
    subspacel1_prox = T.set_subtensor(subspacel1_prox[:, ::2, :,:], amp_value * slow_shrinkage_prox_a)
    subspacel1_prox = T.set_subtensor(subspacel1_prox[:,1::2, :,:], amp_value * slow_shrinkage_prox_b)

    reshape_subspacel1_prox = T.transpose(T.reshape(subspacel1_prox,(featshp[0],featshp[1]*featshp[2]*featshp[3]),ndim=2))
    return reshape_subspacel1_prox
Exemplo n.º 16
0
    def _pooling_function(self, inputs, pool_size, strides, border_mode, dim_ordering):

        if pool_size[0]<-1:
            # k-max pooling
            input_layer = T.transpose(inputs, axes=(0, 1, 3, 2))
            sorted_values = T.argsort(input_layer, axis=3)
            topmax_indexes = sorted_values[:, :, :, -self.k:]
            # sort indexes so that we keep the correct order within the sentence
            topmax_indexes_sorted = T.sort(topmax_indexes)

            # given that topmax only gives the index of the third dimension, we need to generate the other 3 dimensions
            dim0 = T.arange(0, input_layer.shape[0]).repeat(input_layer.shape[1] * input_layer.shape[2] * self.k)
            dim1 = T.arange(0, input_layer.shape[1]).repeat(self.k * input_layer.shape[2]).reshape((1, -1)).repeat(
                input_layer.shape[0],
                axis=0).flatten()
            dim2 = T.arange(0, input_layer.shape[2]).repeat(self.k).reshape((1, -1)).repeat(
                input_layer.shape[0] * input_layer.shape[1],
                axis=0).flatten()
            dim3 = topmax_indexes_sorted.flatten()
            x = T.transpose(
                input_layer[dim0, dim1, dim2, dim3].reshape(
                    (input_layer.shape[0], input_layer.shape[1], input_layer.shape[2], self.k)),
                axes=(0, 1, 3, 2))
            return x
        else:
            return super(MaxPooling2DWrapper, self)._pooling_function(inputs, pool_size, strides, border_mode, dim_ordering)
Exemplo n.º 17
0
def T_l2_cost_conv_dA(x,a,A,imshp,kshp,featshp,stride=(1,1),mask=True):
    image_error, kernel, features = helper_T_l2_cost_conv(x=x,a=a,A=A,imshp=imshp,kshp=kshp,featshp=featshp,stride=stride,mask=mask)

    if stride == (1,1):

        image_error_rot = T.transpose(image_error,[1,0,2,3])[:,:,::-1,::-1]
        imshp_rot = (imshp[1],imshp[0],imshp[2],imshp[3])
        featshp_rot = (featshp[1],featshp[0],featshp[2],featshp[3])
        features_rot = T.transpose(features,[1,0,2,3])

        featshp_rot_logical = (featshp_rot[0],
                               featshp_rot[1],
                               imshp[2] - kshp[2] + 1,
                               imshp[3] - kshp[3] + 1)
        kernel_grad_rot = -1.*conv2d(image_error_rot,features_rot,
                                  image_shape=imshp_rot,filter_shape=featshp_rot,
                                  imshp_logical=imshp_rot[1:],kshp_logical=featshp_rot_logical[2:])
        kernel_grad = T.transpose(kernel_grad_rot,[1,0,2,3])

        reshape_kernel_grad = T.transpose(T.reshape(kernel_grad,(kshp[0],kshp[1]*kshp[2]*kshp[3]),ndim=2))

        return reshape_kernel_grad

    else:
        my_conv = MyConv_view(strides=stride,kshp=kshp)
        kernel_grad = my_conv(image_error,features)

        reshape_kernel_grad = T.transpose(T.reshape(kernel_grad, (kshp[0], kshp[1] * kshp[2] * kshp[3]), ndim=2))

        return reshape_kernel_grad
Exemplo n.º 18
0
    def get_output_for(self, input, **kwargs):
        '''
        Computes 2D FFT. Input layer must have dimension [n, 2, nx, ny]
        '''
        if self.is_3d:

            n, nc, nx, ny, nt = self.data_shape
            lin = T.transpose(input, axes=(0, 4, 1, 2, 3))
            lin = lin.reshape((-1, nc, nx, ny))
            lout, updates = theano.scan(self.transform, sequences=lin)
            lout = lout.reshape((-1, nt, nc, nx, ny))
            out = T.transpose(lout, axes=(0, 2, 3, 4, 1))
            return out

            # def loop_over_n(i, arr):
            #     out, updates = theano.scan(self.transform,
            #                                sequences=arr[:, :, i])[0]
            #     return out

            # nt = self.data_shape[-1]
            # out, updates = theano.scan(loop_over_n,
            #                            non_sequences=input,
            #                            sequences=xrange(nt))
            # return out

        out, updates = theano.scan(self.transform, sequences=input)
        return out
Exemplo n.º 19
0
 def full(self, X, Z=None):
     X, Xc, Z = self._common(X, Z)
     if Z is None:
         return tt.dot(Xc, tt.transpose(Xc))
     else:
         Zc = tt.sub(Z, self.c)
         return tt.dot(Xc, tt.transpose(Zc))
def kmeans(train_set_x):

    if train_set_x is None:
        train_set_x = T.matrix('train_set_x')

    ########################
    # Normalize the inputs #
    ########################

    epsilon_norm = 10
    epsilon_zca = 0.015
    K = 500

    train_set_x = train_set_x - T.mean(train_set_x, axis=0) / T.sqrt(T.var(train_set_x, axis=0) + epsilon_norm)

    #####################
    # Whiten the inputs #
    #####################

    # a simple choice of whitening transform is the ZCA whitening transform
    # epsilon_zca is small constant
    # for contrast-normalizaed data, setting epsilon_zca to 0.01 for 16-by-16 pixel patches,
    #                                                 or to  0.1 for 8-by-8   pixel patches
    # is good starting point
    cov = T.dot(train_set_x, T.transpose(train_set_x)) / train_set_x.shape[1]
    U, S, V = linalg.svd(cov)
    tmp = T.dot(U, T.diag(1/T.sqrt(S + epsilon_zca)))
    tmp = T.dot(tmp, T.transpose(U))
    whitened_x = T.dot(tmp, train_set_x)

    ######################
    # Training the Model #
    ######################

    # Initialization
    dimension_size = whitened_x.shape[0]
    num_samples = whitened_x.shape[1]
    srng = RandomStreams(seed=234)

    D = srng.normal(size=(dimension_size, K))
    D = D / T.sqrt(T.sum(T.sqr(D), axis=0))

    # typically 10 iterations is enough
    num_iteration = 15

    # compute new centroids, D_new
    for i in xrange(num_iteration):

        dx = T.dot(D.T, whitened_x)
        arg_max_dx = T.argmax(dx, axis=0)
        s = dx[arg_max_dx, T.arange(num_samples)]

        S = T.zeros((K, num_samples))
        S = T.set_subtensor(S[arg_max_dx, T.arange(num_samples)], s)
        D = T.dot(whitened_x, T.transpose(S)) + D

        D = D / T.sqrt(T.sum(T.sqr(D), axis=0))

    return D
    def train(self, n_epochs=100, mini_batch_size=1, learning_rate=0.1):
        index = T.lscalar()
        x=T.matrix('x')
        is_dropout = T.dscalar('is_dropout')
        params = [self.W, self.b1, self.b2]
        hidden = self.activation_function(T.dot(x, self.W)+self.b1)
        arr_n = self.get_mask(self.b1,0.5)
        hidden_tilde = hidden
        hidden_tilde = arr_n * hidden
        output_without_drop = T.dot(hidden,T.transpose(self.W))+self.b2
        output_without_drop = self.output_function(output_without_drop)
        output_dropout = T.dot(hidden_tilde,T.transpose(self.W))+self.b2
        output_dropout = self.output_function(output_dropout)
        
        #x_printed = theano.printing.Print('this is a very important value')(arr_n)

        #Use cross-entropy loss.
        L = -T.sum(x*T.log(output_dropout) + (1-x)*T.log(1-output_dropout), axis=1)
        cost=L.mean()
        L_without_drops = -T.sum(x*T.log(output_without_drop) + (1-x)*T.log(1-output_without_drop), axis=1)
        cost2 = L_without_drops.mean()
        
        updates=[]
        #Return gradient with respect to W, b1, b2.
        gparams = T.grad(cost,params)
        
        gparams_shared = theano.shared(gparams,'gparams_shared')
        gparams[0] = gparams[0] * arr_n
        gparams[1] = gparams[1] * arr_n
        #x_printed1 = theano.printing.Print('this is a very important value')(gparams[0])
        #x_printed2 = theano.printing.Print('this is a very important value')(gparams[1])

        #gparams_shared = gparams_shared*mask
        #Create a list of 2 tuples for updates.
        for param, gparam in zip(params, gparams):
            updates.append((param, param-learning_rate*gparam))
        
        #Train given a mini-batch of the data.
        train = th.function(inputs=[index], outputs=[cost], updates=updates,
                            givens={x:self.X[index:index+mini_batch_size,:]})
         
        valid = th.function(inputs=[index], outputs=[cost2],
                            givens={x:self.Y[index:index+mini_batch_size,:]})                
 
        import time
        start_time = time.clock()
        for epoch in xrange(n_epochs):
            print "Epoch:",epoch
            cost_train = 0
            cost_valid = 0
            for row in xrange(0,self.m, mini_batch_size):
                cost_train= cost_train + train(row)[0]
            for row in xrange(0,self.Y_m, mini_batch_size):
                cost_valid = cost_valid + valid(row)[0]
            global_valid_cost.append((cost_valid/self.Y_m))
	    global_train_cost.append((cost_train/self.m))
        
        end_time = time.clock()
        print "Average time per epoch=", (end_time-start_time)/n_epochs
Exemplo n.º 22
0
def weighted_binary_cross_entropy_4(pred, target, class_normalization):
    # Mix of 0 and 2
    # From theano
    DIM = pred.shape[1]
    BATCH_SIZE = pred.shape[0]
    N_on_per_batch = (T.transpose(T.tile(target.sum(axis=1), (DIM, 1))) + 1)
    N_off_per_batch = (T.transpose(T.tile((1-target).sum(axis=1), (DIM, 1))) + 1)
    class_norm_tile = T.tile(class_normalization, (BATCH_SIZE, 1))
    return -(class_norm_tile * target * T.log(pred) / N_on_per_batch + (1.0 - target) * T.log(1.0 - pred) / N_off_per_batch)
def Kmeans(X_train=None, K=300, epsilon_whitening=0.015):

    if X_train is None:
        X_train = T.matrix("X_train")

    ########################
    # Normalize the inputs #
    ########################

    # A constant added to the variance to avoid division by zero
    epsilon_norm = 10

    # We subtract from each training sample (each column in X_train) its mean
    X_train = X_train - T.mean(X_train, axis=0) / T.sqrt(T.var(X_train, axis=0) + epsilon_norm)

    #####################
    # Whiten the inputs #
    #####################

    sigma = T.dot(X_train, T.transpose(X_train)) / X_train.shape[1]
    U, s, V = linalg.svd(sigma, full_matrices=False)
    tmp = T.dot(U, T.diag(1 / T.sqrt(s + epsilon_whitening)))
    tmp = T.dot(tmp, T.transpose(U))
    X_Whitened = T.dot(tmp, X_train)

    ######################
    # Training the Model #
    ######################

    # Initialization
    dimensions = X_Whitened.shape[0]
    samples = X_Whitened.shape[1]
    srng = RandomStreams(seed=234)

    # We initialize the centroids by sampling them from a normal
    # distribution, and then normalizing them to unit length
    # D \in R^{n \times k}
    D = srng.normal(size=(dimensions, K))
    D = D / T.sqrt(T.sum(T.sqr(D), axis=0))

    iterations = 30

    for i in xrange(iterations):

        # Initialize new point representations
        # for every pass of the algorithm
        S = T.zeros((K, samples))

        tmp = T.dot(D.T, X_Whitened)
        res = T.argmax(tmp, axis=0)
        max_values = tmp[res, T.arange(samples)]
        S = T.set_subtensor(S[res, T.arange(samples)], max_values)

        D = T.dot(X_Whitened, T.transpose(S))
        D = D / T.sqrt(T.sum(T.sqr(D), axis=0))

    return D
Exemplo n.º 24
0
        def forward_prop_step(x_t, h_t_prev, c_t_prev):

            h_t_prev.tag.test_value = np.random.uniform(0,1, (300,1)).astype('float64')
            c_t_prev.tag.test_value = np.random.uniform(0,1, (300,1)).astype('float64')

            argm_xt = T.argmax(x_t, axis=0)[0]
            argm_push = T.argmax(self.PUSH, axis=0)[0]
            argm_pop = T.argmax(self.POP, axis=0)[0]
            is_push = T.eq(argm_xt, argm_push)
            is_pop = T.eq(argm_xt, argm_pop)

            #candidate_to_push = W_h_push.dot(h_t_prev)
            candidate_to_push = h_t_prev
            pushed_stack = T.set_subtensor(self.stack[:,:,self.ptr_to_top+1], candidate_to_push)


            top_of_stack = self.stack[:,:,self.ptr_to_top]
            candidate_to_pop = T.tanh( W_h_prev_pop.dot(h_t_prev) + W_h_stack_pop.dot(top_of_stack) )


            self.stack = ifelse( is_push,
                            pushed_stack,
                            ifelse( is_pop,
                                    self.stack,
                                    self.stack
                                    )
                            )

            self.ptr_to_top = ifelse( is_push,
                                 self.ptr_to_top+1,
                                 ifelse( is_pop,
                                         self.ptr_to_top-1,
                                         self.ptr_to_top
                                         )
                                 )

            h_prime = ifelse( is_push,
                              h_t_prev,
                              ifelse( is_pop,
                                      candidate_to_pop,
                                      h_t_prev
                                      )
                              )

            i = T.nnet.hard_sigmoid( W_x_i.dot(x_t) + W_h_i.dot(h_prime) )
            o = T.nnet.hard_sigmoid( W_x_o.dot(x_t) + W_h_o.dot(h_prime) )
            f = T.nnet.hard_sigmoid( W_x_f.dot(x_t) + W_h_f.dot(h_prime) )
            g = T.tanh( W_x_g.dot(x_t) + W_h_g.dot(h_prime) )

            c_t = f*c_t_prev + i*g
            h_t = o*T.tanh(c_t)

            o_t = T.transpose( T.nnet.softmax( T.transpose(W_hy.dot(h_t)) ) )

            #theano.printing.debugprint(o_t)

            return [o_t, h_t, c_t]
Exemplo n.º 25
0
        def forward_prop_step(x_t, h_t_prev, h_t_2_prev, c_t_2_prev, c_t_prev):

            # h_t_prev.tag.test_value = np.random.uniform(0,1, (self.hidden_dim,self.minibatch_size)).astype('float64')
            # c_t_prev.tag.test_value = np.random.uniform(0,1, (self.hidden_dim,self.minibatch_size)).astype('float64')


            # Map input to {push,pop,internal}
            argm_xt = T.argmax(x_t, axis=0)[0]
            argm_push = T.argmax(self.PUSH, axis=0)[0]
            argm_pop = T.argmax(self.POP, axis=0)[0]
            is_push = T.eq(argm_xt, argm_push)
            is_pop = T.eq(argm_xt, argm_pop)


            # Layer 1
            candidate_to_push = h_t_prev
            pushed_stack = T.set_subtensor(self.stack[:,:,self.ptr_to_top+1], candidate_to_push)
            top_of_stack = self.stack[:,:,self.ptr_to_top]
            candidate_to_pop = T.tanh( self.W_h_prev_pop.dot(h_t_prev) + self.W_h_stack_pop.dot(top_of_stack) )
            self.stack = ifelse(is_push, pushed_stack, ifelse( is_pop,self.stack,self.stack))
            self.ptr_to_top = ifelse(is_push, self.ptr_to_top+1, ifelse( is_pop, self.ptr_to_top-1, self.ptr_to_top))
            h_prime = ifelse(is_push,h_t_prev, ifelse( is_pop, candidate_to_pop, h_t_prev))


            i = T.nnet.hard_sigmoid( self.W_x_i.dot(x_t) + self.W_h_i.dot(h_prime) )
            o = T.nnet.hard_sigmoid( self.W_x_o.dot(x_t) + self.W_h_o.dot(h_prime) )
            f = T.nnet.hard_sigmoid( self.W_x_f.dot(x_t) + self.W_h_f.dot(h_prime) )
            g = T.tanh( self.W_x_g.dot(x_t) + self.W_h_g.dot(h_prime) )

            c_t = f*c_t_prev + i*g
            h_t = o*T.tanh(c_t)


            # Layer 2
            candidate_to_push_2 = h_t_2_prev
            pushed_stack_2 = T.set_subtensor(self.stack_2[:,:,self.ptr_to_top_2+1], candidate_to_push_2)
            top_of_stack_2 = self.stack_2[:,:,self.ptr_to_top_2]
            candidate_to_pop_2 = T.tanh( self.W_h_prev_pop_2.dot(h_t_2_prev) + self.W_h_stack_pop_2.dot(top_of_stack_2) )
            self.stack_2 = ifelse(is_push, pushed_stack_2, ifelse(is_pop, self.stack_2, self.stack_2))
            self.ptr_to_top_2 = ifelse(is_push, self.ptr_to_top_2+1, ifelse(is_pop, self.ptr_to_top_2-1, self.ptr_to_top_2))
            h_prime_2 = ifelse(is_push, h_t_2_prev, ifelse(is_pop, candidate_to_pop_2, h_t_2_prev))

            i_2 = T.nnet.hard_sigmoid( self.W_x_i_2.dot(h_t) + self.W_h_i_2.dot(h_prime_2) )
            o_2 = T.nnet.hard_sigmoid( self.W_x_o_2.dot(h_t) + self.W_h_o_2.dot(h_prime_2) )
            f_2 = T.nnet.hard_sigmoid( self.W_x_f_2.dot(h_t) + self.W_h_f_2.dot(h_prime_2) )
            g_2 = T.tanh( self.W_x_g_2.dot(h_t) + self.W_h_g_2.dot(h_prime_2) )

            c_t_2 = f_2*c_t_2_prev + i_2*g_2
            h_t_2 = o_2*T.tanh(c_t_2)


            # Output
            o_t = T.transpose( T.nnet.softmax( T.transpose(self.W_hy.dot(h_t_2)) ) )


            return [o_t, h_t, h_t_2, c_t, c_t_2]
Exemplo n.º 26
0
 def _build_conditional(self, Xnew, X, f, cov_total, mean_total):
     Kxx = cov_total(X)
     Kxs = self.cov_func(X, Xnew)
     L = cholesky(stabilize(Kxx))
     A = solve_lower(L, Kxs)
     v = solve_lower(L, f - mean_total(X))
     mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v)
     Kss = self.cov_func(Xnew)
     cov = Kss - tt.dot(tt.transpose(A), A)
     return mu, cov
Exemplo n.º 27
0
 def square_dist(self, X, Xs):
     X2 = tt.sum(tt.square(X), 1)
     if Xs is None:
         sqd = (-2.0 * tt.dot(X, tt.transpose(X))
                + (tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1))))
     else:
         Xs2 = tt.sum(tt.square(Xs), 1)
         sqd = (-2.0 * tt.dot(X, tt.transpose(Xs))
                + (tt.reshape(Xs2, (-1, 1)) + tt.reshape(Xs2, (1, -1))))
     return tt.clip(sqd, 0.0, np.inf)
Exemplo n.º 28
0
 def __init__(self, input, iNeuronNum, oNeuronNum, activateType, train):
     self._input = input
     self._weight = theano.shared((np.random.randn(oNeuronNum, iNeuronNum) / (iNeuronNum ** 0.5)))
     self._bias = theano.shared(np.random.randn(oNeuronNum))
     self._activateType = activateType
     self._output = T.transpose(T.dot(self._weight, T.transpose(self._input)) + self._bias.dimshuffle(0, "x"))
     self._parameter = [self._weight, self._bias]
     self._iNeuronNum = iNeuronNum
     self._oNeuronNum = oNeuronNum
     self._train = train
Exemplo n.º 29
0
def softmax_segm(x):
    '''
    MODIFICATIONS:
        - reshape from image-size to array and back
    '''
    shape = T.shape(x)
    x_mod = T.transpose(x, (0,2,3,1))
    x_mod = T.reshape(x_mod, (-1,shape[1]))
    results = softmax(x_mod)
    results = T.reshape(results, (shape[0],shape[2],shape[3],shape[1]))
    return T.transpose(results, (0,3,1,2))
def my_siamese_loss(y_true, y_pred):

    v_pari= y_pred[0::2]
    v_dispari= y_pred[1::2]
    y_pari= y_true[0::2]
    y_dispari= y_true[1::2]
    d=T.square(v_pari-v_dispari)
    l=T.sum(d,axis=1)
    loss=T.mean(T.transpose(y_pari) * l + T.transpose(1-y_pari)*T.maximum(margin-l,0))

    return loss
Exemplo n.º 31
0

#===================================================================================
#==========================Theano Function definitions==============================
#===================================================================================
ATemp = T.matrix('ATemp')
BTemp = T.tensor3('BTemp')
UTemp = T.matrix('UTemp')
E1Temp = T.vector('E1Temp')
E2Temp = T.vector('E2Temp')
E1E2Temp = T.vector('E1E2Temp')
ECTemp = T.vector('ECTemp')
E1ECTemp = T.vector('E1ECTemp')

#Calculate scoring function
temp1 = E1Temp.dot(BTemp).dot(T.transpose(E2Temp))
temp2 = ATemp.dot(E1E2Temp)
temp3 = temp1 + temp2
temp4 = T.tanh(temp3)
score = UTemp.dot(temp4)
scoringFunction = theano.function(
    [ATemp, BTemp, UTemp, E1Temp, E2Temp, E1E2Temp], score)


#=======================================================================
#Function Name : loadEntityVectors
#Input : relation name
#Output : return NN params (A,B,U) for input relation
#Functionality : Returns NN parameters for specific relation
#        Function reads parameters from text files dumped while training
#=======================================================================
Exemplo n.º 32
0
def transpose(x):
    """Tensor transpose """
    return T.transpose(x)
Exemplo n.º 33
0
    def __init__(self,
                 cooccurrence,
                 z_k,
                 opt,
                 initializer,
                 pz_weight_regularizer=None,
                 pz_regularizer=None,
                 initial_pz=None,
                 initial_b=None,
                 eps=1e-8):
        cooccurrence = cooccurrence.astype(np.float32)
        self.cooccurrence = cooccurrence
        self.z_k = z_k
        self.opt = opt
        x_k = cooccurrence.shape[0]
        self.x_k = x_k
        self.pz_weight_regularizer = pz_weight_regularizer
        self.pz_regularizer = pz_regularizer

        # cooccurrence matrix
        n = np.sum(cooccurrence, axis=None)
        _co = cooccurrence / n
        co = T.constant(_co, name="co")  # (x_k, x_k)
        _co_m = np.sum(_co, axis=1, keepdims=True)
        co_m = T.constant(_co_m, name="co_m")  # (x_k,1)
        _co_c = _co / (eps + _co_m)
        _co_h = np.sum(_co * -np.log(eps + _co_c), axis=1, keepdims=True)  # (x_k, 1)
        print "COh: {}".format(np.sum(_co_h))
        co_h = T.constant(_co_h, name="co_h")

        # parameters
        # P(z|x)
        if initial_pz is None:
            initial_pz = initializer((x_k, z_k))
        pz_weight = K.variable(initial_pz, name="pz_weight")  # (x_k, z_k)
        initial_w = initializer((z_k, x_k))
        w = K.variable(initial_w, name="w")
        if initial_b is None:
            initial_b = initializer((x_k,))
        b = K.variable(initial_b, name="b")
        params = [pz_weight, w, b]

        # loss
        p_z = softmax_nd(pz_weight)  # (x_k, z_k)
        bucketprobs = softmax_nd(w + b)  # (z_k, x_k)
        bucketnll = -T.log(eps + bucketprobs)  # (z_k, x_k)
        lossparts = T.dot(co, T.transpose(bucketnll, (1, 0)))  # (x_k, z_k)
        nll = T.sum(p_z * lossparts)

        # val loss
        enc = T.argmax(pz_weight, axis=1)
        oh = tensor_one_hot(enc, k=z_k)  # (x_k, z_k)
        p_b = T.dot(T.transpose(oh, (1, 0)), co)  # (z_k, x_k)
        marg = T.sum(p_b, axis=1, keepdims=True)  # (z_k, 1)
        cond = p_b / (marg + eps)  # (z_k, x_k)
        val_nll = T.sum(p_b * -T.log(eps + cond), axis=None)  # scalar

        # utilization
        utilization = T.sum(T.gt(T.sum(oh, axis=0), 0), axis=0)  # scalar

        reg_loss = T.constant(0.)
        self.regularize = False
        if pz_weight_regularizer:
            reg_loss += pz_weight_regularizer(pz_weight)
            self.regularize = True
        if pz_regularizer:
            reg_loss += pz_regularizer(p_z)
            self.regularize = True
        total_loss = nll + reg_loss

        self.val_fun = theano.function([], [nll, reg_loss, total_loss, val_nll, utilization])
        self.encodings_fun = theano.function([], enc)

        updates = opt.get_updates(params=params, loss=total_loss)
        self.train_fun = theano.function([], [nll, reg_loss, total_loss], updates=updates)
        self.weights = params + opt.weights
    def __init__(self, context, V, K, num_sub_tags, feature_matrix_values, context_sz, rng):
        """
        Initialize the parameters of the language model
        """
        # training contexts
        self.context = context
       
        # initialize context word embedding matrix R of shape (V, K)
        # TODO: parameterize initialization
        R_values = np.asarray(rng.uniform(-0.01, 0.01, size=(V, K)), 
                              dtype=theano.config.floatX)
        R_values[:,0:2] = np.zeros((V,2))
        self.R = theano.shared(value=R_values, name='R', borrow=True)
        # initialize target word embedding matrix Q of shape (V, K)
        Q_values = np.asarray(rng.uniform(-0.01, 0.01, size=(V, K)), 
                              dtype=theano.config.floatX)

        Q_values[:,0:2] = np.zeros((V,2))
        self.Q = theano.shared(value=Q_values, name='Q', borrow=True)
        # initialize weight tensor C of shape (context_sz, K, K)
        C_values = np.asarray(rng.normal(0, math.sqrt(0.1), 
                                         size=(context_sz, K, K)), 
                              dtype=theano.config.floatX)
        self.C = theano.shared(value=C_values, name='C', borrow=True)

        # initialize tag matrix
        Tag_values = np.asarray(rng.normal(-0.01,0.01,size=(num_sub_tags,K)),
                                dtype=theano.config.floatX)
        self.Tag = theano.shared(value=Tag_values,name='Tag',borrow=True)

        # initialize bias vector 
        b_values = np.asarray(rng.normal(0, math.sqrt(0.1), size=(V,)), 
                              dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, name='b', borrow=True)
        # context word representations
        self.r_w = self.R[context]
        # predicted word representation for target word
        self.q_hat = T.tensordot(self.C, self.r_w, axes=[[0,1], [1,2]])
        # similarity score between predicted word and all target words
      
        self.s = T.transpose(T.dot(self.Q, self.q_hat) + T.reshape(self.b, (V,1)))
        # softmax activation function
        self.p_w_given_h = T.nnet.softmax(self.s)

        
        self.feature_matrix = theano.shared(value=feature_matrix_values,name="feature_matrix",borrow=True)
    

        # activation function for tags

        # feature_matrix : Tag Size x Sub Tag Size
        # Tag : Sub Tag Size x K
        # Q.T : K x V
        # s_tag = Tag Size x V 
        self.s_tag = T.dot(T.dot(self.feature_matrix,self.Tag),T.transpose(self.Q))
        #self.s_tag = T.dot((T.dot(self.feature_matrix,self.Tag)),T.transpose(self.Q))
        # softmax activation function tag given word distribution
        self.p_t_given_w = T.nnet.softmax(self.s_tag)


        # parameters of the model
        self.params = [self.R, self.Q, self.C, self.b, self.Tag]
Exemplo n.º 35
0
    def __init__(self,
                 n_in,
                 n_hidden,
                 x=T.tensor3("x"),
                 xc=T.tensor3("xc"),
                 mask=T.matrix("mask"),
                 maskc=T.matrix("maskx"),
                 prefix=""):

        self.params = []
        if x is not None:
            self.x = x
        else:
            self.x = T.tensor3("x")
        if xc is not None:
            self.xc = xc
        else:
            self.xc = T.tensor3("xc")

        if mask is not None:
            self.mask = mask
        else:
            self.mask = T.matrix("mask")
        if maskc is not None:
            self.maskc = maskc
        else:
            self.maskc = T.matrix("maskc")

        #### 转置 为了进行scan运算 ###

        nmask = T.transpose(self.mask, axes=(1, 0))
        nx = T.transpose(self.x, axes=(1, 0, 2))

        nmaskc = T.transpose(self.maskc, axes=(1, 0))
        nxc = T.transpose(self.xc, axes=(1, 0, 2))

        wz_x, bz = init_weight(n_in, n_hidden, pre="%s_lstm_f_x_" % prefix)
        self.params += [wz_x, bz]

        wr_x, br = init_weight(n_in, n_hidden, pre="%s_lstm_i_x_" % prefix)
        self.params += [wr_x, br]

        wc_x, bc = init_weight(n_in, n_hidden, pre="%s_lstm_c_x_" % prefix)
        self.params += [wc_x, bc]

        wz_h, b_h = init_weight(n_hidden,
                                n_hidden,
                                pre="%s_lstm_f_h_" % prefix)
        self.params += [wz_h]

        wr_h, b_h = init_weight(n_hidden,
                                n_hidden,
                                pre="%s_lstm_i_h_" % prefix)
        self.params += [wr_h]

        wc_h, b_h = init_weight(n_hidden,
                                n_hidden,
                                pre="%s_lstm_c_h_" % prefix)
        self.params += [wc_h]

        #h_t_0 = T.alloc(np.array(0.,dtype=np.float64), x.shape[0], n_hidden)
        #c_t_0 = T.alloc(np.array(0.,dtype=np.float64), x.shape[0], n_hidden)
        h_t_0 = T.alloc(0., x.shape[0], n_hidden)
        h_t_0_c = T.alloc(0., xc.shape[0], n_hidden)

        #h_t_0 = theano.shared(np.zeros(n_hidden, dtype=theano.config.floatX))
        #c_t_0 = theano.shared(np.zeros(n_hidden, dtype=theano.config.floatX))

        h, r = theano.scan(
            self.recurrent_fn,
            sequences=[nx, nmask],
            outputs_info=[h_t_0],
            non_sequences=[wz_x, wz_h, bz, wr_x, wr_h, br, wc_x, wc_h, bc])

        hc, rc = theano.scan(
            self.recurrent_fn,
            sequences=[nxc, nmaskc],
            outputs_info=[h_t_0_c],
            non_sequences=[wz_x, wz_h, bz, wr_x, wr_h, br, wc_x, wc_h, bc])

        self.all_hiddenx = T.transpose(h, axes=(1, 0, 2))
        self.nn_outx = h[-1]

        self.all_hiddenc = T.transpose(hc, axes=(1, 0, 2))
        self.nn_outc = hc[-1]

        self.nn_out = h[-1] - hc[-1]
Exemplo n.º 36
0
    def __init__(self, n_hidden, embedding_dimention=50):

        ##n_in: sequence lstm 的输入维度
        ##n_hidden: lstm for candi and zp 的隐层维度
        ##n_hidden_sequence: sequence lstm的隐层维度 因为要同zp的结合做dot,所以其维度要是n_hidden的2倍
        ##                   即 n_hidden_sequence = 2 * n_hidden
        self.params = []

        self.zp_x_pre = T.matrix("zp_x_pre")
        self.zp_x_post = T.matrix("zp_x_post")

        #self.zp_x_pre_dropout = _dropout_from_layer(self.zp_x_pre)
        #self.zp_x_post_dropout = _dropout_from_layer(self.zp_x_post)

        zp_nn_pre = GRU(embedding_dimention, n_hidden, self.zp_x_pre)
        #zp_nn_pre = LSTM(embedding_dimention,n_hidden,self.zp_x_pre_dropout)
        self.params += zp_nn_pre.params

        zp_nn_post = GRU(embedding_dimention, n_hidden, self.zp_x_post)
        #zp_nn_post = LSTM(embedding_dimention,n_hidden,self.zp_x_post_dropout)
        self.params += zp_nn_post.params

        self.zp_out = T.concatenate((zp_nn_pre.nn_out, zp_nn_post.nn_out))

        self.ZP_layer = Layer(n_hidden * 2, n_hidden * 2, self.zp_out, ReLU)

        self.zp_out_output = self.ZP_layer.output

        #self.zp_out_dropout = _dropout_from_layer(T.concatenate((zp_nn_pre.nn_out,zp_nn_post.nn_out)))

        self.get_zp_out = theano.function(
            inputs=[self.zp_x_pre, self.zp_x_post],
            outputs=[self.ZP_layer.output])

        ### get sequence output for NP ###
        self.np_x = T.tensor3("np_x")
        self.np_x_post = T.tensor3("np_x")
        self.np_x_pre = T.tensor3("np_x")

        #self.np_x_dropout = _dropout_from_layer(self.np_x)

        self.mask = T.matrix("mask")
        self.mask_pre = T.matrix("mask")
        self.mask_post = T.matrix("mask")

        self.np_nn_x = RNN_batch(embedding_dimention, n_hidden, self.np_x,
                                 self.mask)
        self.params += self.np_nn_x.params
        self.np_nn_pre = GRU_batch(embedding_dimention, n_hidden,
                                   self.np_x_pre, self.mask_pre)
        self.params += self.np_nn_pre.params
        self.np_nn_post = GRU_batch(embedding_dimention, n_hidden,
                                    self.np_x_post, self.mask_post)
        self.params += self.np_nn_post.params

        #self.np_nn_out = LSTM_batch(embedding_dimention,n_hidden*2,self.np_x,self.mask)
        #self.np_nn_out = LSTM_batch(embedding_dimention,n_hidden*2,self.np_x_dropout,self.mask)
        #self.params += self.np_nn_out.params

        #self.np_out = self.np_nn.nn_out
        self.np_nn_x_output = (self.np_nn_x.all_hidden).mean(axis=1)
        self.np_nn_post_output = self.np_nn_post.nn_out
        self.np_nn_pre_output = self.np_nn_pre.nn_out

        self.np_out = T.concatenate(
            (self.np_nn_x_output, self.np_nn_post_output,
             self.np_nn_pre_output),
            axis=1)

        self.NP_layer = Layer(n_hidden * 3, n_hidden * 2, self.np_out, ReLU)

        self.np_out_output = self.NP_layer.output

        self.np_x_head = T.transpose(self.np_x, axes=(1, 0, 2))[-1]

        self.get_np_head = theano.function(inputs=[self.np_x],
                                           outputs=[self.np_x_head])
        self.get_np = theano.function(inputs=[
            self.np_x, self.np_x_pre, self.np_x_post, self.mask, self.mask_pre,
            self.mask_post
        ],
                                      outputs=[self.np_out])
        self.get_np_out = theano.function(inputs=[
            self.np_x, self.np_x_pre, self.np_x_post, self.mask, self.mask_pre,
            self.mask_post
        ],
                                          outputs=[self.np_out_output])

        w_attention_zp, b_attention = init_weight(n_hidden * 2,
                                                  1,
                                                  pre="attention_hidden",
                                                  ones=False)
        self.params += [w_attention_zp, b_attention]

        w_attention_np, b_u = init_weight(n_hidden * 2,
                                          1,
                                          pre="attention_zp",
                                          ones=False)
        self.params += [w_attention_np]

        self.calcu_attention = tanh(
            T.dot(self.np_out_output, w_attention_np) +
            T.dot(self.zp_out_output, w_attention_zp) + b_attention)
        self.attention = softmax(T.transpose(self.calcu_attention,
                                             axes=(1, 0)))[0]
        self.get_attention = theano.function(inputs=[
            self.zp_x_pre, self.zp_x_post, self.np_x, self.np_x_pre,
            self.np_x_post, self.mask, self.mask_pre, self.mask_post
        ],
                                             outputs=[self.attention])

        new_zp = T.sum(self.attention[:, None] * self.np_x_head, axis=0)
        self.get_new_zp = theano.function(inputs=[
            self.zp_x_pre, self.zp_x_post, self.np_x, self.np_x_pre,
            self.np_x_post, self.mask, self.mask_pre, self.mask_post
        ],
                                          outputs=[new_zp])

        #### *** HOP *** ####
        self.w_hop_zp, self.b_hop_zp = init_weight(n_hidden * 2 +
                                                   embedding_dimention,
                                                   n_hidden * 2,
                                                   pre="hop_")
        self.params += [self.w_hop_zp, self.b_hop_zp]

        ## hop 1 ##
        self.zp_hop_1_init = T.concatenate(
            (zp_nn_pre.nn_out, zp_nn_post.nn_out, new_zp))
        self.zp_hop_1 = ReLU(
            T.dot(self.zp_hop_1_init, self.w_hop_zp) + self.b_hop_zp)

        self.calcu_attention_hop_1 = tanh(
            T.dot(self.np_out_output, w_attention_np) +
            T.dot(self.zp_hop_1, w_attention_zp) + b_attention)
        self.attention_hop_1 = softmax(
            T.transpose(self.calcu_attention_hop_1, axes=(1, 0)))[0]
        self.get_attention_hop_1 = theano.function(
            inputs=[
                self.zp_x_pre, self.zp_x_post, self.np_x, self.np_x_pre,
                self.np_x_post, self.mask, self.mask_pre, self.mask_post
            ],
            outputs=[self.attention_hop_1])

        self.out = self.attention_hop_1

        self.get_out = theano.function(inputs=[
            self.zp_x_pre, self.zp_x_post, self.np_x, self.np_x_pre,
            self.np_x_post, self.mask, self.mask_pre, self.mask_post
        ],
                                       outputs=[self.out])

        l1_norm_squared = sum([(w**2).sum() for w in self.params])
        l2_norm_squared = sum([(abs(w)).sum() for w in self.params])

        lmbda_l1 = 0.0
        #lmbda_l2 = 0.001
        lmbda_l2 = 0.0

        t = T.bvector()
        cost = -(T.log((self.out * t).sum()))
        #cost = -(T.log((self.out_dropout*t).sum()))
        #cost = 1-((self.out*t).sum())

        lr = T.scalar()
        #grads = T.grad(cost, self.params)
        #updates = [(param, param-lr*grad)
        #    for param, grad in zip(self.params, grads)]

        #updates = lasagne.updates.sgd(cost, self.params, lr)
        updates = lasagne.updates.adadelta(cost, self.params)

        self.train_step = theano.function(inputs=[
            self.zp_x_pre, self.zp_x_post, self.np_x, self.np_x_pre,
            self.np_x_post, self.mask, self.mask_pre, self.mask_post, t, lr
        ],
                                          outputs=[cost],
                                          on_unused_input='warn',
                                          updates=updates)
Exemplo n.º 37
0
    def __init__(self, n_hidden, embedding_dimention=50, feature_dimention=61):

        ##n_in: sequence lstm 的输入维度
        ##n_hidden: lstm for candi and zp 的隐层维度

        #repre_active = ReLU
        repre_active = linear

        self.params = []

        self.zp_x_pre = T.matrix("zp_x_pre")
        self.zp_x_post = T.matrix("zp_x_post")

        zp_nn_pre = LSTM(embedding_dimention, n_hidden, self.zp_x_pre)
        self.params += zp_nn_pre.params

        zp_nn_post = LSTM(embedding_dimention, n_hidden, self.zp_x_post)
        self.params += zp_nn_post.params

        attention_pre_on_post = softmax(
            (zp_nn_pre.nn_out * zp_nn_post.all_hidden).sum(axis=1))[0]
        attention_post_on_pre = softmax(
            (zp_nn_post.nn_out * zp_nn_pre.all_hidden).sum(axis=1))[0]

        zp_post = T.sum(attention_pre_on_post[:, None] * zp_nn_post.all_hidden,
                        axis=0)
        zp_pre = T.sum(attention_post_on_pre[:, None] * zp_nn_pre.all_hidden,
                       axis=0)

        #self.zp_out = T.concatenate((zp_nn_pre.nn_out,zp_nn_post.nn_out))
        self.zp_out = T.concatenate((zp_post, zp_pre))

        self.zp_out_output = self.zp_out

        ### get sequence output for NP ###
        self.np_x_post = T.tensor3("np_x")
        self.np_x_postc = T.tensor3("np_x")

        self.np_x_pre = T.tensor3("np_x")
        self.np_x_prec = T.tensor3("np_x")

        self.mask_pre = T.matrix("mask")
        self.mask_prec = T.matrix("mask")

        self.mask_post = T.matrix("mask")
        self.mask_postc = T.matrix("mask")

        self.np_nn_pre = sub_LSTM_batch(embedding_dimention, n_hidden,
                                        self.np_x_pre, self.np_x_prec,
                                        self.mask_pre, self.mask_prec)
        self.params += self.np_nn_pre.params
        self.np_nn_post = sub_LSTM_batch(embedding_dimention, n_hidden,
                                         self.np_x_post, self.np_x_postc,
                                         self.mask_post, self.mask_postc)
        self.params += self.np_nn_post.params

        self.np_nn_post_output = self.np_nn_post.nn_out
        self.np_nn_pre_output = self.np_nn_pre.nn_out

        self.np_out = T.concatenate(
            (self.np_nn_post_output, self.np_nn_pre_output), axis=1)

        #np_nn_f = LSTM(n_hidden*2,n_hidden*2,self.np_out)
        #self.params += np_nn_f.params
        #np_nn_b = LSTM(n_hidden*2,n_hidden*2,self.np_out[::-1])
        #self.params += np_nn_b.params

        #self.bi_np_out = T.concatenate((np_nn_f.all_hidden,np_nn_b.all_hidden[::-1]),axis=1)

        #self.np_out_output = self.bi_np_out
        #self.get_np_out = theano.function(inputs=[self.np_x_pre,self.np_x_prec,self.np_x_post,self.np_x_postc,self.mask_pre,self.mask_prec,self.mask_post,self.mask_postc],outputs=[self.np_out_output])

        self.feature = T.matrix("feature")
        self.feature_layer = Layer(feature_dimention, n_hidden, self.feature,
                                   repre_active)
        self.params += self.feature_layer.params

        w_attention_zp, b_attention = init_weight(n_hidden * 2,
                                                  1,
                                                  pre="attention_zp",
                                                  ones=False)
        self.params += [w_attention_zp, b_attention]

        w_attention_np, b_u = init_weight(n_hidden * 2,
                                          1,
                                          pre="attention_np",
                                          ones=False)
        self.params += [w_attention_np]

        #w_attention_np_rnn,b_u = init_weight(n_hidden*4,1,pre="attention_np_rnn",ones=False)
        #self.params += [w_attention_np_rnn]

        w_attention_feature, b_u = init_weight(n_hidden,
                                               1,
                                               pre="attention_feature",
                                               ones=False)
        self.params += [w_attention_feature]

        self.calcu_attention = tanh(
            T.dot(self.zp_out_output, w_attention_zp) +
            T.dot(self.np_out, w_attention_np) +
            T.dot(self.feature_layer.output, w_attention_feature) +
            b_attention)
        #self.calcu_attention = tanh(T.dot(self.np_out_output,w_attention_np_rnn) + T.dot(self.zp_out_output,w_attention_zp) + T.dot(self.np_out,w_attention_np) + T.dot(self.feature_layer.output,w_attention_feature) + b_attention)
        #self.calcu_attention = tanh(T.dot(self.np_out_output,w_attention_np_rnn) + T.dot(self.zp_out_output,w_attention_zp) + T.dot(self.np_out,w_attention_np) + b_attention)

        self.attention = softmax(T.transpose(self.calcu_attention,
                                             axes=(1, 0)))[0]

        self.out = self.attention

        self.get_out = theano.function(inputs=[
            self.zp_x_pre, self.zp_x_post, self.np_x_pre, self.np_x_prec,
            self.np_x_post, self.np_x_postc, self.mask_pre, self.mask_prec,
            self.mask_post, self.mask_postc, self.feature
        ],
                                       outputs=[self.out],
                                       on_unused_input='warn')

        l1_norm_squared = sum([(w**2).sum() for w in self.params])
        l2_norm_squared = sum([(abs(w)).sum() for w in self.params])

        lmbda_l1 = 0.0
        #lmbda_l2 = 0.001
        lmbda_l2 = 0.0

        t = T.bvector()
        cost = -(T.log((self.out * t).sum()))

        lr = T.scalar()

        updates = lasagne.updates.sgd(cost, self.params, lr)
        #updates = lasagne.updates.adadelta(cost, self.params)

        self.train_step = theano.function(inputs=[
            self.zp_x_pre, self.zp_x_post, self.np_x_pre, self.np_x_prec,
            self.np_x_post, self.np_x_postc, self.mask_pre, self.mask_prec,
            self.mask_post, self.mask_postc, self.feature, t, lr
        ],
                                          outputs=[cost],
                                          on_unused_input='warn',
                                          updates=updates)
def logNormalPDF(X, Mu, XChol):
    Lambda = Tla.matrix_inverse(T.dot(XChol, T.transpose(XChol)))
    XMu = X - Mu
    return (-0.5 * T.dot(XMu, T.dot(Lambda, T.transpose(XMu))) +
            0.5 * T.log(Tla.det(Lambda)) -
            0.5 * np.log(2 * np.pi) * X.shape[0])
Exemplo n.º 39
0
    def __init__(self,
                 n_actions,
                 replay_memory,
                 build_network,
                 updates,
                 screen_size,
                 initial_weights_file=None):
        self.screen_size = screen_size
        self.mood_q = None
        self.last_q = 0
        self.n_parameter_updates = 0
        self.alpha = 0.00025
        # update frequency ?
        # gradient momentum ? 0.95
        # squared gradient momentum ? 0.95
        # min squared gradient ? 0.01
        self.save_every_n_frames = 100000  # ~ once per hour

        self.final_exploration_frame = 1000000
        self.replay_start_size = 50000
        self.i_action = 0

        self.state = None
        self.initial_epsilon = 1
        self.final_epsilon = 0.1
        self.epsilon = self.initial_epsilon
        self.gamma = 0.99
        self.replay_memory = replay_memory

        self.log_frequency = 1

        self.minibatch_size = 32
        # self.replay_memory_size = 1000000

        self.target_network_update_frequency = 10000

        s0_var = T.tensor4("s0", dtype=theano.config.floatX)
        a0_var = T.bmatrix("a0")
        r0_var = T.wcol("r0")
        s1_var = T.tensor4("s1", dtype=theano.config.floatX)
        future_reward_indicator_var = T.bcol("future_reward_indicator")

        self.n_actions = n_actions
        self.a_lookup = np.eye(self.n_actions, dtype=np.int8)

        self.network = build_network(n_actions=self.n_actions,
                                     input_var=T.cast(s0_var, 'float32') /
                                     np.float32(256),
                                     screen_size=self.screen_size)
        print("Compiling forward.")
        self.forward = theano.function([s0_var],
                                       lasagne.layers.get_output(
                                           self.network, deterministic=True))

        self.network_stale = build_network(
            n_actions=self.n_actions,
            input_var=T.cast(s1_var, 'float32') / np.float32(256),
            screen_size=self.screen_size)
        print("Compiling forward_stale.")
        self.forward_stale = theano.function([s1_var],
                                             lasagne.layers.get_output(
                                                 self.network_stale,
                                                 deterministic=True))

        if initial_weights_file is not None:
            with np.load(initial_weights_file) as initial_weights:
                param_values = [
                    initial_weights['arr_%d' % i]
                    for i in range(len(initial_weights.files))
                ]
                lasagne.layers.set_all_param_values(self.network, param_values)
            self.i_action -= self.replay_start_size

        self._update_network_stale()

        out = lasagne.layers.get_output(self.network)
        out_stale = lasagne.layers.get_output(self.network_stale)
        self.loss, self.err, __y, __q = build_loss(
            out=out,
            out_stale=out_stale,
            a0_var=a0_var,
            r0_var=r0_var,
            future_reward_indicator_var=future_reward_indicator_var,
            gamma=self.gamma)

        params = lasagne.layers.get_all_params(self.network, trainable=True)

        print("Compiling train_fn.")
        self.train_fn = theano.function(
            [s0_var, a0_var, r0_var, s1_var, future_reward_indicator_var], [
                self.loss, self.err,
                T.transpose(__y),
                T.transpose(__q), out, out_stale
            ],
            updates=updates(self.loss, params))
        print("Compiling loss_fn.")
        self.loss_fn = theano.function(
            [s0_var, a0_var, r0_var, s1_var, future_reward_indicator_var],
            self.loss)

        self.test_mode = False
    def __init__(
            self,
            numpy_rng,
            theano_rng=None,
            input1_v=None,
            input2_v=None,
            input3_v=None,
            input1_c=None,
            input2_c=None,
            input3_c=None,
            n_visible1_v=4096,
            n_visible2_v=4096,
            n_visible1_c=3529,
            n_visible2_c=3529,
            n_hidden_v=None,
            n_hidden_c=None,
            W1_c=None,
            bhid1_c=None,
            bvis1_c=None,
            W2_c=None,
            bhid2_c=None,
            bvis2_c=None,
            W1_v=None,
            bhid1_v=None,
            bvis1_v=None,
            W2_v=None,
            bhid2_v=None,
            bvis2_v=None,
            lamda=None,
            mu=None,
            beta=None,
            theta=None,
            momentum=0.9
    ):
        self.n_visible1_v = n_visible1_v
        self.n_visible2_v = n_visible2_v
        self.n_hidden_v = n_hidden_v
        self.n_visible1_c = n_visible1_c
        self.n_visible2_c = n_visible2_c
        self.n_hidden_c = n_hidden_c
        self.lamda = lamda
        self.mu = mu
        self.beta = beta
        self.theta = theta
        self.momentum = momentum
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))



        self.W1_v = W1_v
        self.W2_v = W2_v
        self.W1_c = W1_c
        self.W2_c = W2_c

        self.b1_v = bhid1_v
        self.b2_v = bhid2_v
        self.b1_c = bhid1_c
        self.b2_c = bhid2_c

        self.b1_prime_v = bvis1_v
        self.b2_prime_v = bvis2_v
        self.b1_prime_c = bvis1_c
        self.b2_prime_c = bvis2_c

        self.W1_prime_v = T.transpose(self.W1_v)
        self.W2_prime_v = T.transpose(self.W2_v)
        self.W1_prime_c = T.transpose(self.W1_c)
        self.W2_prime_c = T.transpose(self.W2_c)


        self.theano_rng = theano_rng
        self.L2_sqr = (
            (self.W1_v ** 2).mean() + (self.W2_v ** 2).mean() + (self.W1_c ** 2).mean() + (self.W2_c ** 2).mean()
            + (self.b1_v ** 2).mean() + (self.b2_v ** 2).mean() + (self.b1_c ** 2).mean() + (self.b2_c ** 2).mean()
            + (self.b1_prime_v ** 2).mean() + (self.b2_prime_v ** 2).mean() + (self.b1_prime_c ** 2).mean() + (self.b2_prime_c ** 2).mean()
        )
        # if no input is given, generate a variable representing the input
        if input1_v is None:
            # we use a matrix because we expect a minibatch of several
            # examples, each example being a row
            self.x1_v = T.dmatrix(name='input1_v',dtype='float32')
            self.x2_v = T.dmatrix(name='input2_v',dtype='float32')
            self.x3_v = T.dmatrix(name='input3_v',dtype='float32')
            self.x1_c = T.dmatrix(name='input1_c',dtype='float32')
            self.x2_c = T.dmatrix(name='input2_c',dtype='float32')
            self.x3_c = T.dmatrix(name='input3_c',dtype='float32')

        else:
            self.x1_v = input1_v
            self.x2_v = input2_v
            self.x3_v = input3_v
            self.x1_c = input1_c
            self.x2_c = input2_c
            self.x3_c = input3_c

        self.params = [self.W1_v, self.b1_v, self.b1_prime_v,
                       self.W2_v, self.b2_v, self.b2_prime_v,
                       self.W1_c, self.b1_c, self.b1_prime_c,
                       self.W2_c, self.b2_c, self.b2_prime_c
                       ]
        # end-snippet-1
        self.output1_v = T.nnet.hard_sigmoid (T.dot(self.x1_v, self.W1_v) + self.b1_v)
        self.output2_v = T.nnet.hard_sigmoid (T.dot(self.x2_v, self.W2_v) + self.b2_v)
        self.output3_v = T.nnet.hard_sigmoid (T.dot(self.x3_v, self.W2_v) + self.b2_v)
        self.output1_c = T.nnet.hard_sigmoid (T.dot(self.x1_c, self.W1_c) + self.b1_c)
        self.output2_c = T.nnet.hard_sigmoid (T.dot(self.x2_c, self.W2_c) + self.b2_c)
        self.output3_c = T.nnet.hard_sigmoid (T.dot(self.x3_c, self.W2_c) + self.b2_c)

        self.output1t_v = T.transpose(self.output1_v)
        self.output2t_v = T.transpose(self.output2_v)
        self.output3t_v = T.transpose(self.output3_v)
        self.output1t_c = T.transpose(self.output1_c)
        self.output2t_c = T.transpose(self.output2_c)
        self.output3t_c = T.transpose(self.output3_c)

        self.rec1_v = T.nnet.hard_sigmoid (T.dot(self.output1_v, self.W1_prime_v) + self.b1_prime_v)
        self.rec2_v = T.nnet.hard_sigmoid (T.dot(self.output2_v, self.W2_prime_v) + self.b2_prime_v)
        self.rec3_v = T.nnet.hard_sigmoid (T.dot(self.output3_v, self.W2_prime_v) + self.b2_prime_v)
        self.rec1_c = T.nnet.hard_sigmoid (T.dot(self.output1_c, self.W1_prime_c) + self.b1_prime_c)
        self.rec2_c = T.nnet.hard_sigmoid (T.dot(self.output2_c, self.W2_prime_c) + self.b2_prime_c)
        self.rec3_c = T.nnet.hard_sigmoid (T.dot(self.output3_c, self.W2_prime_c) + self.b2_prime_c)
weights(np.random.rand(1), np.random.rand(1), np.random.rand(1),
        np.random.rand(1), np.random.rand(1), np.random.rand(1),
        np.random.rand(1), np.random.rand(1), np.random.rand(1),
        np.random.rand(1), np.random.rand(1), np.random.rand(1),
        np.random.rand(1), np.random.rand(1), np.random.rand(1),
        np.random.rand(1), np.random.rand(1), np.random.rand(1))

#W1real=T.set_subtensor(W[0:3],W[0:3])
#W2real=T.set_subtensor(W[3:6],W[3:6])
#W3real=T.set_subtensor(W[6:9],W[6:9])
#W1imag=T.set_subtensor(W[9:12],W[9:12])
#W2imag=T.set_subtensor(W[12:15],W[12:15])
#W3imag=T.set_subtensor(W[15:18],W[15:18])

cost = T.sqr(
    T.sum(T.transpose(W[0:3]) * xreal) -
    T.sum(T.transpose(W[9:12]) * ximag)) + T.sqr(
        T.sum(T.transpose(W[0:3]) * ximag) +
        T.sum(T.transpose(W[9:12]) * xreal)) + T.sqr(
            T.sum(T.transpose(W[3:6]) * xreal) -
            T.sum(T.transpose(W[12:15]) * ximag)) + T.sqr(
                T.sum(T.transpose(W[3:6]) * ximag) +
                T.sum(T.transpose(W[12:15]) * xreal)) + T.sqr(
                    T.sum(T.transpose(W[6:9]) * ximag) +
                    T.sum(T.transpose(W[15:18]) * xreal)) + l * T.sqr(
                        T.sqr(
                            T.sum(T.transpose(W[6:9]) * xreal) -
                            T.sum(T.transpose(W[15:18]) * ximag)) - 1)

loss = []
gradients = theano.tensor.grad(cost, [W])
Exemplo n.º 42
0
def errors(ypred, ytrue):
    shp = ypred.shape
    rypred = T.transpose(ypred.reshape((shp[1], shp[0] * shp[2] * shp[3])))
    preds = T.argmax(ryped, axis=1)
    return T.mean(T.neq(preds, ytrue))
 def log_loss(self, y):
     return -T.dot(T.log(self.p_y_given_x),
                   T.transpose(y))[T.arange(y.shape[0]),
                                   T.arange(y.shape[0])]
Exemplo n.º 44
0
    def __init__(self,
                 n_in,
                 n_hidden,
                 x=T.tensor3("x"),
                 xc=T.tensor3("xc"),
                 mask=T.matrix("mask"),
                 maskc=T.matrix("maskx"),
                 prefix=""):

        self.params = []
        if x is not None:
            self.x = x
        else:
            self.x = T.tensor3("x")
        if xc is not None:
            self.xc = xc
        else:
            self.xc = T.tensor3("xc")

        if mask is not None:
            self.mask = mask
        else:
            self.mask = T.matrix("mask")
        if maskc is not None:
            self.maskc = maskc
        else:
            self.maskc = T.matrix("maskc")

        #### 转置 为了进行scan运算 ###

        nmask = T.transpose(self.mask, axes=(1, 0))
        nx = T.transpose(self.x, axes=(1, 0, 2))

        nmaskc = T.transpose(self.maskc, axes=(1, 0))
        nxc = T.transpose(self.xc, axes=(1, 0, 2))

        wf_x, bf = init_weight(n_in, n_hidden, pre="%s_lstm_f_x_" % prefix)
        self.params += [wf_x, bf]

        wi_x, bi = init_weight(n_in, n_hidden, pre="%s_lstm_i_x_" % prefix)
        self.params += [wi_x, bi]

        wc_x, bc = init_weight(n_in, n_hidden, pre="%s_lstm_c_x_" % prefix)
        self.params += [wc_x, bc]

        wo_x, bo = init_weight(n_in, n_hidden, pre="%s_lstm_o_x_" % prefix)
        self.params += [wo_x, bo]

        wf_h, b_h = init_weight(n_hidden,
                                n_hidden,
                                pre="%s_lstm_f_h_" % prefix)
        self.params += [wf_h]

        wi_h, b_h = init_weight(n_hidden,
                                n_hidden,
                                pre="%s_lstm_i_h_" % prefix)
        self.params += [wi_h]

        wc_h, b_h = init_weight(n_hidden,
                                n_hidden,
                                pre="%s_lstm_c_h_" % prefix)
        self.params += [wc_h]

        wo_h, b_h = init_weight(n_hidden,
                                n_hidden,
                                pre="%s_lstm_o_h_" % prefix)
        self.params += [wo_h]

        #h_t_0 = T.alloc(np.array(0.,dtype=np.float64), x.shape[0], n_hidden)
        #c_t_0 = T.alloc(np.array(0.,dtype=np.float64), x.shape[0], n_hidden)
        h_t_0 = T.alloc(0., x.shape[0], n_hidden)
        c_t_0 = T.alloc(0., x.shape[0], n_hidden)

        h_t_0_c = T.alloc(0., xc.shape[0], n_hidden)
        c_t_0_c = T.alloc(0., xc.shape[0], n_hidden)

        [h, c], r = theano.scan(self.lstm_recurrent_fn,
                                sequences=[nx, nmask],
                                outputs_info=[h_t_0, c_t_0],
                                non_sequences=[
                                    wf_x, wf_h, bf, wi_x, wi_h, bi, wc_h, wc_x,
                                    bc, wo_x, wo_h, bo
                                ])

        [hc, cc], rc = theano.scan(self.lstm_recurrent_fn,
                                   sequences=[nxc, nmaskc],
                                   outputs_info=[h_t_0_c, c_t_0_c],
                                   non_sequences=[
                                       wf_x, wf_h, bf, wi_x, wi_h, bi, wc_h,
                                       wc_x, bc, wo_x, wo_h, bo
                                   ])

        self.all_hiddenx = T.transpose(h, axes=(1, 0, 2))
        self.nn_outx = h[-1]

        self.all_hiddenc = T.transpose(hc, axes=(1, 0, 2))
        self.nn_outc = hc[-1]

        self.nn_out = h[-1] - hc[-1]
Exemplo n.º 45
0
def nll(ypred, ytrue):
    shp = ypred.shape
    rypred = T.transpose(ypred.reshape((shp[1], shp[0] * shp[2] * shp[3])))
    return -T.mean(T.log(rypred)[T.arange(rypred.shape[0]), ytrue])
Exemplo n.º 46
0
    def __init__(self, n_hidden, embedding_dimention=50, feature_dimention=61):

        ##n_in: sequence lstm 的输入维度
        ##n_hidden: lstm for candi and zp 的隐层维度

        self.params = []

        self.w_embedding = init_weight_file(args.embedding,
                                            args.embedding_dimention)
        self.params.append(self.w_embedding)

        self.zp_x_pre_index = T.imatrix("zp_x_pre")
        self.zp_x_post_index = T.imatrix("zp_x_post")

        zp_x_pre_newshape = (T.shape(self.zp_x_pre_index)[0],
                             args.embedding_dimention)
        self.embedding_sub_zp_pre = self.w_embedding[
            self.zp_x_pre_index.flatten()]
        self.zp_x_pre = T.reshape(self.embedding_sub_zp_pre, zp_x_pre_newshape)

        zp_x_post_newshape = (T.shape(self.zp_x_post_index)[0],
                              args.embedding_dimention)
        self.embedding_sub_zp_post = self.w_embedding[
            self.zp_x_post_index.flatten()]
        self.zp_x_post = T.reshape(self.embedding_sub_zp_post,
                                   zp_x_post_newshape)

        zp_nn_pre = LSTM(embedding_dimention, n_hidden, self.zp_x_pre)
        self.params += zp_nn_pre.params

        zp_nn_post = LSTM(embedding_dimention, n_hidden, self.zp_x_post)
        self.params += zp_nn_post.params

        danwei = theano.shared(np.eye(8, dtype=theano.config.floatX))

        H_pre = zp_nn_pre.all_hidden
        H_post = zp_nn_post.all_hidden

        Ws1_pre, heihei = init_weight(n_hidden,
                                      n_hidden,
                                      pre="Ws1_pre_zp",
                                      ones=False)
        Ws2_pre, heihei = init_weight(8,
                                      n_hidden,
                                      pre="Ws2_pre_zp",
                                      ones=False)
        self.params += [Ws1_pre, Ws2_pre]

        A_pre = softmax(T.dot(Ws2_pre, T.dot(Ws1_pre, T.transpose(H_pre))))

        P_pre = T.dot(A_pre, T.transpose(A_pre)) - danwei
        #norm_pre, _ = theano.scan(lambda i, tmp: T.dot(P_pre[i], P_pre[i]) + tmp,
        #          sequences = T.arange(P_pre.shape[0]),
        #          outputs_info = np.asarray(0., dtype=theano.config.floatX))
        #f_norm_pre = T.sum(norm_pre[-1])
        f_norm_pre = (P_pre**2).sum()
        zp_out_pre = T.mean(T.dot(A_pre, H_pre), axis=0)

        Ws1_post, heihei = init_weight(n_hidden,
                                       n_hidden,
                                       pre="Ws1_post_zp",
                                       ones=False)
        Ws2_post, heihei = init_weight(8,
                                       n_hidden,
                                       pre="Ws2_post_zp",
                                       ones=False)
        self.params += [Ws1_post, Ws2_post]
        A_post = softmax(T.dot(Ws2_post, T.dot(Ws1_post, T.transpose(H_post))))

        P_post = T.dot(A_post, T.transpose(A_post)) - danwei
        #norm_post, _ = theano.scan(lambda i, tmp: T.dot(P_post[i], P_post[i]) + tmp,
        #          sequences = T.arange(P_post.shape[0]),
        #          outputs_info = np.asarray(0., dtype=theano.config.floatX))
        #f_norm_post = T.sum(norm_post[-1])
        f_norm_post = (P_post**2).sum()

        zp_out_post = T.mean(T.dot(A_post, H_post), axis=0)

        f_norm = f_norm_pre + f_norm_post

        #self.zp_out = T.concatenate((zp_nn_pre.nn_out,zp_nn_post.nn_out))
        self.zp_out = T.concatenate((zp_out_pre, zp_out_post))

        self.zp_out_output = self.zp_out

        ### get sequence output for NP ###
        self.np_x_post_index = T.itensor3("np_x")
        self.np_x_postc_index = T.itensor3("np_x")
        self.np_x_pre_index = T.itensor3("np_x")
        self.np_x_prec_index = T.itensor3("np_x")

        np_x_post_newshape = (T.shape(self.np_x_post_index)[0],
                              T.shape(self.np_x_post_index)[1],
                              args.embedding_dimention)
        self.embedding_sub_np_x_post = self.w_embedding[
            self.np_x_post_index.flatten()]
        self.np_x_post = T.reshape(self.embedding_sub_np_x_post,
                                   np_x_post_newshape)

        np_x_postc_newshape = (T.shape(self.np_x_postc_index)[0],
                               T.shape(self.np_x_postc_index)[1],
                               args.embedding_dimention)
        self.embedding_sub_np_x_postc = self.w_embedding[
            self.np_x_postc_index.flatten()]
        self.np_x_postc = T.reshape(self.embedding_sub_np_x_postc,
                                    np_x_postc_newshape)

        np_x_pre_newshape = (T.shape(self.np_x_pre_index)[0],
                             T.shape(self.np_x_pre_index)[1],
                             args.embedding_dimention)
        self.embedding_sub_np_x_pre = self.w_embedding[
            self.np_x_pre_index.flatten()]
        self.np_x_pre = T.reshape(self.embedding_sub_np_x_pre,
                                  np_x_pre_newshape)

        np_x_prec_newshape = (T.shape(self.np_x_prec_index)[0],
                              T.shape(self.np_x_prec_index)[1],
                              args.embedding_dimention)
        self.embedding_sub_np_x_prec = self.w_embedding[
            self.np_x_prec_index.flatten()]
        self.np_x_prec = T.reshape(self.embedding_sub_np_x_prec,
                                   np_x_prec_newshape)

        self.mask_pre = T.matrix("mask")
        self.mask_prec = T.matrix("mask")

        self.mask_post = T.matrix("mask")
        self.mask_postc = T.matrix("mask")

        self.np_nn_pre = sub_LSTM_batch(embedding_dimention, n_hidden,
                                        self.np_x_pre, self.np_x_prec,
                                        self.mask_pre, self.mask_prec)
        self.params += self.np_nn_pre.params
        self.np_nn_post = sub_LSTM_batch(embedding_dimention, n_hidden,
                                         self.np_x_post, self.np_x_postc,
                                         self.mask_post, self.mask_postc)
        self.params += self.np_nn_post.params

        self.np_nn_post_output = self.np_nn_post.nn_out
        self.np_nn_pre_output = self.np_nn_pre.nn_out

        self.np_out = T.concatenate(
            (self.np_nn_post_output, self.np_nn_pre_output), axis=1)

        np_nn_f = LSTM(n_hidden * 2, n_hidden * 2, self.np_out)
        self.params += np_nn_f.params
        np_nn_b = LSTM(n_hidden * 2, n_hidden * 2, self.np_out[::-1])
        self.params += np_nn_b.params

        self.bi_np_out = T.concatenate(
            (np_nn_f.all_hidden, np_nn_b.all_hidden[::-1]), axis=1)

        self.np_out_output = self.bi_np_out
        #self.get_np_out = theano.function(inputs=[self.np_x_pre,self.np_x_prec,self.np_x_post,self.np_x_postc,self.mask_pre,self.mask_prec,self.mask_post,self.mask_postc],outputs=[self.np_out_output])

        #self.feature = T.matrix("feature")
        #self.feature_layer = Layer(feature_dimention,n_hidden,self.feature,repre_active)
        #self.params += self.feature_layer.params

        w_attention_zp, b_attention = init_weight(n_hidden * 2,
                                                  1,
                                                  pre="attention_zp",
                                                  ones=False)
        self.params += [w_attention_zp, b_attention]

        w_attention_np, b_u = init_weight(n_hidden * 2,
                                          1,
                                          pre="attention_np",
                                          ones=False)
        #self.params += [w_attention_np]

        w_attention_np_rnn, b_u = init_weight(n_hidden * 4,
                                              1,
                                              pre="attention_np_rnn",
                                              ones=False)
        self.params += [w_attention_np_rnn]

        #np_out_dropout = _dropout_from_layer(self.np_out_output)
        #zp_out_dropout = _dropout_from_layer(self.zp_out_output)
        #np_dropout = _dropout_from_layer(self.np_out)

        #self.calcu_attention_dropout = tanh(T.dot(np_out_dropout,w_attention_np_rnn) + T.dot(zp_out_dropout,w_attention_zp) + T.dot(np_dropout,w_attention_np) + b_attention)

        #self.calcu_attention = tanh(T.dot(self.np_out_output,w_attention_np_rnn) + T.dot(self.zp_out_output,w_attention_zp) + T.dot(self.np_out,w_attention_np) + b_attention)
        self.calcu_attention = tanh(
            T.dot(self.np_out_output, w_attention_np_rnn) +
            T.dot(self.zp_out_output, w_attention_zp) + b_attention)

        self.attention = softmax(T.transpose(self.calcu_attention,
                                             axes=(1, 0)))[0]
        #self.attention_dropout = softmax(T.transpose(self.calcu_attention_dropout,axes=(1,0)))[0]

        self.out = self.attention
        #self.out_dropout = self.attention_dropout

        self.get_out = theano.function(inputs=[
            self.zp_x_pre_index, self.zp_x_post_index, self.np_x_pre_index,
            self.np_x_prec_index, self.np_x_post_index, self.np_x_postc_index,
            self.mask_pre, self.mask_prec, self.mask_post, self.mask_postc
        ],
                                       outputs=[self.out],
                                       on_unused_input='warn')

        l1_norm_squared = sum([(w**2).sum() for w in self.params])
        l2_norm_squared = sum([(abs(w)).sum() for w in self.params])

        lmbda_l1 = 0.0
        #lmbda_l2 = 0.001
        lmbda_l2 = 0.0

        t = T.bvector()
        cost = -(T.log((self.out * t).sum())) + f_norm
        #cost = -(T.log((self.out_dropout*t).sum()))

        lr = T.scalar()

        updates = lasagne.updates.sgd(cost, self.params, lr)
        #updates = lasagne.updates.adadelta(cost, self.params)

        self.train_step = theano.function(inputs=[
            self.zp_x_pre_index, self.zp_x_post_index, self.np_x_pre_index,
            self.np_x_prec_index, self.np_x_post_index, self.np_x_postc_index,
            self.mask_pre, self.mask_prec, self.mask_post, self.mask_postc, t,
            lr
        ],
                                          outputs=[cost],
                                          on_unused_input='warn',
                                          updates=updates)
Exemplo n.º 47
0
Arquivo: NTN.py Projeto: vjadhav702/IS
#==========================Theano Function definitions==============================
#===================================================================================

#variables declaration
ATemp = T.matrix('ATemp')
BTemp = T.tensor3('BTemp')
UTemp = T.matrix('UTemp')
E1Temp = T.vector('E1Temp')
E2Temp = T.vector('E2Temp')
E1E2Temp = T.vector('E1E2Temp')
ECTemp = T.vector('ECTemp')
E1ECTemp = T.vector('E1ECTemp')

#Definition of scoring function
score = UTemp.dot(
    T.tanh(E1Temp.dot(BTemp).dot(T.transpose(E2Temp)) + ATemp.dot(E1E2Temp)))
scoringFunction = theano.function(
    [ATemp, BTemp, UTemp, E1Temp, E2Temp, E1E2Temp], score)

#Definition of loss function

#calculated score of corrupted triplet to calculate loss
scoreCorrupted = UTemp.dot(
    T.tanh(E1Temp.dot(BTemp).dot(T.transpose(ECTemp)) + ATemp.dot(E1ECTemp)))
loss = T.largest(0, (1 - (UTemp.dot(
    T.tanh(E1Temp.dot(BTemp).dot(T.transpose(E2Temp)) + ATemp.dot(E1E2Temp))
)) + (UTemp.dot(
    T.tanh(E1Temp.dot(BTemp).dot(T.transpose(ECTemp)) + ATemp.dot(E1ECTemp))))
                     + regparam *
                     (T.sum(ATemp**2) + T.sum(BTemp**2) + T.sum(UTemp**2)) /
                     3))
Exemplo n.º 48
0
    def __init__(self, phase, config, vocabulary_size=1295, hidden_ndim=512):
        # need to be same voca_size and hidde_ndim so as to load same shape params
        # self.log_self()
        size = 101

        # model paras
        self.config = config
        learning_rate = self.config.items['lr']
        self.alpha = np.array(1e-3, dtype=np.float32)
        self.eps = np.array(1e-6, dtype=np.float32)
        self.learning_rate = theano.shared(np.float32(config.items['lr']))

        self.nClasses = vocabulary_size + 1
        self.vocabulary_size = vocabulary_size

        # variables
        image = T.tensor4(
            'image')  # (2*nb*len, 3, 101, 101) or (2*nb*3, 3, 101, 101)
        mask = T.matrix('mask')  # (nb, max_hlen)
        token = T.imatrix('token')  # (nb, max_vlen)

        self.nb = mask.shape[0]
        self.max_xlen = image.shape[0] / 2 / self.nb
        self.max_hlen = mask.shape[1]

        net = {}

        # RGB modal
        net['image'] = InputLayer(shape=(None, 3, size,
                                         size))  # (2*nb*len, 3, 101, 101)

        # both hand
        net['conv1'] = Conv2DLayer(incoming=net['image'],
                                   num_filters=96,
                                   filter_size=7,
                                   stride=2)
        net['norm1'] = LocalResponseNormalization2DLayer(incoming=net['conv1'])
        net['pool1'] = MaxPool2DLayer(incoming=net['norm1'], pool_size=3)

        net['conv2'] = Conv2DLayer(incoming=net['pool1'],
                                   num_filters=256,
                                   filter_size=5)
        net['pool2'] = MaxPool2DLayer(incoming=net['conv2'], pool_size=2)

        net['conv3'] = Conv2DLayer(incoming=net['pool2'],
                                   num_filters=512,
                                   filter_size=3,
                                   pad=1)
        net['conv4'] = Conv2DLayer(incoming=net['conv3'],
                                   num_filters=512,
                                   filter_size=3,
                                   pad=1)
        net['conv5'] = Conv2DLayer(incoming=net['conv4'],
                                   num_filters=512,
                                   filter_size=3,
                                   pad=1)

        # modal fusion
        net['pool5'] = MaxPool2DLayer(incoming=net['conv5'],
                                      pool_size=3)  # (2nb*len, 512, 2, 2)
        net['fc6'] = DenseLayer(
            incoming=net['pool5'],
            num_units=1024)  # (2nb*len, 1024) or (nb*3, 1024)

        # dropout should be shared among timestep, or triplets
        net['drop6'] = ReshapeLayer(incoming=net['fc6'],
                                    shape=(2 * self.nb, -1, 1024))
        # net['drop6'] = DropoutLayer(incoming=net['pre_drop6'], p=0.2, shared_axes=(1,))
        net['fc7'] = DenseLayer(ReshapeLayer(net['drop6'], shape=(-1, 1024)),
                                num_units=256,
                                nonlinearity=identity)  # (3*nb, 256)

        # encoding network for image features
        net['mask'] = InputLayer(shape=(None, None),
                                 name='mask')  # (nb, max_hlen)
        net['pre_conv1d'] = DimshuffleLayer(net['drop6'],
                                            (0, 2, 1))  # (nb, 1024, max_xlen)
        net['conv1d_1'] = Conv1DLayer(net['pre_conv1d'],
                                      num_filters=1024,
                                      filter_size=3,
                                      pad='same')
        net['pool1d_1'] = MaxPool1DLayer(net['conv1d_1'],
                                         pool_size=2)  #(nb, 1024, max_xlen/2)
        net['drop1d_1'] = DropoutLayer(net['pool1d_1'],
                                       p=0.1,
                                       shared_axes=(2, ))

        net['conv1d_2'] = Conv1DLayer(net['drop1d_1'],
                                      num_filters=1024,
                                      filter_size=3,
                                      pad='same')
        net['pool1d_2'] = MaxPool1DLayer(net['conv1d_2'],
                                         pool_size=2)  #(nb, 1024, max_hlen)
        net['drop1d_2'] = DropoutLayer(net['pool1d_2'],
                                       p=0.1,
                                       shared_axes=(2, ))

        # LSTM, input shape=(nb, max_hlen, 1024)
        # two LSTM, one for fusion, one for right hand
        net['lstm_input'] = DimshuffleLayer(
            net['drop1d_2'], (0, 2, 1))  # (2*nb, max_hlen, 1024)

        # right hand lstm
        net['lstm_input_right'] = ExpressionLayer(
            net['lstm_input'],
            function=lambda x: x[:x.shape[0] / 2],
            output_shape='auto')
        net['lstm_frw_right'] = LSTMLayer(
            incoming=net['lstm_input_right'],
            mask_input=net['mask'],
            forgetgate=Gate(b=lasagne.init.Constant(1.0)),
            num_units=hidden_ndim)  # (nb, max_hlen, hidden_ndim)
        net['lstm_bck_right'] = LSTMLayer(
            incoming=net['lstm_input_right'],
            mask_input=net['mask'],
            forgetgate=Gate(b=lasagne.init.Constant(1.0)),
            num_units=hidden_ndim,
            backwards=True)
        net['lstm_shp_right'] = ReshapeLayer(
            ConcatLayer((net['lstm_frw_right'], net['lstm_bck_right']),
                        axis=2),
            shape=(-1, 2 * hidden_ndim))  # (nb*max_hlen, 2*hidden_ndim)

        # fusion lstm
        net['lstm_input_fusion'] = ExpressionLayer(
            net['lstm_input'],
            function=lambda x: T.concatenate(
                [x[:x.shape[0] / 2], x[x.shape[0] / 2:]], axis=2) / 2.0,
            output_shape='auto')
        net['lstm_frw_fusion'] = LSTMLayer(
            incoming=net['lstm_input_fusion'],
            mask_input=net['mask'],
            forgetgate=Gate(b=lasagne.init.Constant(1.0)),
            num_units=hidden_ndim)  # (nb, max_hlen, hidden_ndim)
        net['lstm_bck_fusion'] = LSTMLayer(
            incoming=net['lstm_input_fusion'],
            mask_input=net['mask'],
            forgetgate=Gate(b=lasagne.init.Constant(1.0)),
            num_units=hidden_ndim,
            backwards=True)
        net['lstm_shp_fusion'] = ReshapeLayer(
            ConcatLayer((net['lstm_frw_fusion'], net['lstm_bck_fusion']),
                        axis=2),
            shape=(-1, 2 * hidden_ndim))  # (nb*max_hlen, 2*hidden_ndim)

        net['lstm_shp'] = ConcatLayer(
            [net['lstm_shp_right'], net['lstm_shp_fusion']], axis=1)
        # net['lstm_shp'] = net['lstm_shp_right']

        net['out'] = DenseLayer(
            net['lstm_shp'], self.nClasses,
            nonlinearity=identity)  # (nb*max_hlen, nClasses)
        net['out_lin'] = ReshapeLayer(net['out'],
                                      shape=(self.nb, -1, self.nClasses))

        self.net = net

        # try save load model
        dummy_save_file = 'dummy.pkl'
        glog.info('try save load dummy model to: %s...' % dummy_save_file)
        self.save_model(dummy_save_file)
        self.load_model(dummy_save_file)
        os.system('rm -rf dummy.pkl')
        glog.info(
            'dummy save load success, remove it and start calculate outputs...'
        )

        if phase == 'pretrain':
            # for triplet pretrain use
            self.params_feat = get_all_params(net['fc7'])
            regular_feat = lasagne.regularization.apply_penalty(
                self.params_feat, lasagne.regularization.l2) * np.array(
                    5e-4 / 2, dtype=np.float32)

            ## triplet train loss
            triplet_loss_train = self.get_triplet_loss(image,
                                                       opflow,
                                                       deterministic=False)
            loss_train_feat = triplet_loss_train + regular_feat

            ## triplet valid loss
            triplet_loss_valid = self.get_triplet_loss(image,
                                                       opflow,
                                                       deterministic=True)
            loss_valid_feat = triplet_loss_valid + regular_feat

            self.updates = lasagne.updates.momentum(
                loss_train_feat,
                self.params_feat,
                learning_rate=learning_rate,
                momentum=0.9)
            self.inputs = [image, opflow]
            self.train_outputs = [loss_train_feat, triplet_loss_train]
            self.valid_outputs = [loss_valid_feat, triplet_loss_valid]

        elif phase == 'ctc':
            # for ctc loss
            self.params_full = lasagne.layers.get_all_params(
                self.net['out_lin'], trainable=True)
            self.regular_params = lasagne.layers.get_all_params(
                self.net['out_lin'], regularizable=True)
            regular_full = lasagne.regularization.apply_penalty(
                self.regular_params, lasagne.regularization.l2) * np.array(
                    5e-4 / 2, dtype=np.float32)

            # full train loss
            ctc_loss_train, pred_train = self.get_ctc_loss(image,
                                                           mask,
                                                           token,
                                                           deteministic=False)
            loss_train_full = ctc_loss_train + regular_full

            # full valid loss
            ctc_loss_valid, pred_valid = self.get_ctc_loss(image,
                                                           mask,
                                                           token,
                                                           deteministic=True)
            loss_valid_full = ctc_loss_valid + regular_full

            self.updates = lasagne.updates.adam(
                loss_train_full,
                self.params_full,
                learning_rate=self.learning_rate)
            self.inputs = [image, mask, token]
            self.train_outputs = [loss_train_full, ctc_loss_train, pred_train]
            self.valid_outputs = [loss_valid_full, ctc_loss_valid, pred_valid]

        elif phase == 'extract_feature':
            pass
            # # for feature extraction
            # fc6 = get_output(self.net['fc6'], data, deterministic = True)
            # self.feature_func = theano.function(inputs=[data], outputs=fc6)

        elif phase == 'get_prediction':
            embeding = get_output(self.net['fusion_2'], {
                self.net['image']: image,
                self.net['opflow']: opflow,
                self.net['coord']: coord
            },
                                  deterministic=True)  # (nb, 1280, len_m)
            output_lin = get_output(
                self.net['out_lin'], {
                    self.net['lstm_input']: T.transpose(embeding, (0, 2, 1)),
                    self.net['mask']: mask
                },
                deterministic=True)

            output_softmax = Softmax(output_lin)  # (nb, max_hlen, nClasses)
            output_trans = T.transpose(output_softmax,
                                       (1, 0, 2))  # (max_hlen, nb, nClasses)

            best_path_loss, best_path = best_right_path_cost(
                output_trans, mask, token)
            ctc_loss = ctc_cost(output_trans, T.sum(mask,
                                                    axis=1,
                                                    dtype='int32'), token)

            # (nb, max_hlen, voca_size+1)
            self.predict_func = theano.function(
                inputs=[data, mask, token],
                outputs=[best_path_loss, best_path, ctc_loss])

        elif phase == 'top_k_prediction':
            embeding = get_output(self.net['fusion_2'], {
                self.net['image']: image,
                self.net['opflow']: opflow,
                self.net['coord']: coord
            },
                                  deterministic=True)  # (nb, 1280, len_m)
            output_lin = get_output(
                self.net['out_lin'], {
                    self.net['lstm_input']: T.transpose(embeding, (0, 2, 1)),
                    self.net['mask']: mask
                },
                deterministic=True)

            output_softmax = Softmax(output_lin)  # (nb, max_hlen, nClasses)
            output_trans = T.transpose(output_softmax,
                                       (1, 0, 2))  # (max_hlen, nb, nClasses)

            top_k_path_loss, top_k_path = top_k_right_path_cost(
                output_trans, mask, token, k=config.items['top_k'])
            ctc_loss = ctc_cost(output_trans, T.sum(mask,
                                                    axis=1,
                                                    dtype='int32'), token)

            # (nb, max_hlen, voca_size+1)
            self.predict_func = theano.function(
                inputs=[data, mask, token],
                outputs=[output_lin, top_k_path_loss, top_k_path, ctc_loss])

        glog.info('Model built, phase = %s' % phase)
Exemplo n.º 49
0
margin = 0.10
lambda_ = 1.0
norm_in = T.sqrt(T.sum(prediction * prediction, axis=1))
norm_tar = T.sqrt(T.sum(target_var * target_var, axis=1))
norm_neg = T.sqrt(T.sum(neg_var * neg_var, axis=1))
#norm_in=input_var.sum(axis=1).reshape((input_var.shape[0], 1))
prod_xy_unnorm = (prediction * target_var)
prod_xneg_unnorm = (prediction * neg_var)

prod_xy_unnorm = prod_xy_unnorm.sum(axis=1)
prod_xneg_unnorm = prod_xneg_unnorm.sum(axis=1)

norm = norm_in * norm_tar + eps
norm_xneg = norm_in * norm_neg + eps

prod_xy = prod_xy_unnorm / (T.transpose(norm))
prod_xneg = prod_xneg_unnorm / (T.transpose(norm_xneg))
rank_loss = margin - prod_xy + prod_xneg
rank_loss = T.maximum(rank_loss, 0)
rank_loss_m = T.mean(rank_loss, axis=0)

dist = 1 - prod_xy
dist_m = T.mean(dist, axis=0)
(lr, mtm) = (0.01, 0.9)

#regularize all layers below dense
params = lasagne.layers.get_all_params(network, trainable=True)
loss = dist_m + lambda_ * rank_loss_m
updates = lasagne.updates.adagrad(loss, params, learning_rate=lr)
train_fn = theano.function([input_var, target_var, neg_var],
                           [loss, prediction],
Exemplo n.º 50
0
def cosine_similarity(A, B):
    return T.dot(A, T.transpose(B)) / (T.dot(A, T.transpose(A)) *
                                       T.dot(B, T.transpose(B)))
Exemplo n.º 51
0
 def get_upds(self, inp):
     w_update = T.dot(T.transpose(inp),
                      self.fprop(inp)) * (1.0 / self.MB_size)
     h_update = T.mean(self.fprop(inp), axis=0)
     v_update = T.mean(inp, axis=0)
     return w_update, h_update, v_update
Exemplo n.º 52
0
    def get_light_curve(self,
                        orbit=None,
                        r=None,
                        t=None,
                        texp=None,
                        return_num_eval=False,
                        light_delay=False,
                        **kwargs):
        """Get the light curve for an orbit at a set of times

        Args:
            orbit: An object with a ``get_relative_position`` method that
                takes a tensor of times and returns a list of Cartesian
                coordinates of a set of bodies relative to the central source.
                This method should return three tensors (one for each
                coordinate dimension) and each tensor should have the shape
                ``append(t.shape, r.shape)`` or ``append(t.shape, oversample,
                r.shape)`` when ``texp`` is given. The first two coordinate
                dimensions are treated as being in the plane of the sky and the
                third coordinate is the line of sight with positive values
                pointing *away* from the observer. For an example, take a look
                at :class:`orbits.KeplerianOrbit`.
            r (tensor): The radius of the transiting body in the same units as
                ``r_star``. This should have a shape that is consistent with
                the coordinates returned by ``orbit``. In general, this means
                that it should probably be a scalar or a vector with one entry
                for each body in ``orbit``.
            t (tensor): The times where the light curve should be evaluated.
            texp (Optional[tensor]): The exposure time of each observation.
                This can be a scalar or a tensor with the same shape as ``t``.
                If ``texp`` is provided, ``t`` is assumed to indicate the
                timestamp at the *middle* of an exposure of length ``texp``.

        """
        if orbit is None:
            raise ValueError("missing required argument 'orbit'")
        if r is None:
            raise ValueError("missing required argument 'r'")
        if t is None:
            raise ValueError("missing required argument 't'")

        r = tt.as_tensor_variable(r)
        r = tt.reshape(r, (r.size, ))
        t = tt.as_tensor_variable(t)

        def pad(arg):
            return arg
            # return tt.shape_padleft(arg, t.ndim) + tt.shape_padright(
            #     tt.zeros_like(t), arg.ndim
            # )

        rgrid = pad(r)
        if texp is None:
            coords = orbit.get_relative_position(t, light_delay=light_delay)
            b = tt.sqrt(coords[0]**2 + coords[1]**2)
            b = tt.reshape(b, rgrid.shape)
            los = tt.reshape(coords[2], rgrid.shape)
            return limbdark(self.c_norm, b / orbit.r_star,
                            rgrid / orbit.r_star, los)[0]

        n = pad(orbit.n)
        sini = pad(orbit.sin_incl)
        cosi = pad(orbit.cos_incl)
        # texp = tt.as_tensor_variable(texp) + tt.zeros_like(rgrid)

        if orbit.ecc is None:
            aome2 = pad(-orbit.a)
            e = 0.0
            sinw = 0.0
            cosw = 0.0
            kwargs["circular"] = True
        else:
            aome2 = pad(-orbit.a * (1 - orbit.ecc**2))
            e = pad(orbit.ecc)
            sinw = pad(orbit.sin_omega)
            cosw = pad(orbit.cos_omega)
            kwargs["circular"] = False

        # Apply the time integrated op
        tgrid = tt.transpose(orbit._warp_times(t) - orbit.tref)
        texp = tt.as_tensor_variable(texp) + tt.zeros_like(tgrid)
        kwargs["Nc"] = kwargs.get("Nc", self.num_cl)
        op = IntegratedLimbDarkOp(**kwargs)
        res = op(
            self.c_norm,
            texp,
            tgrid,
            rgrid / orbit.r_star,
            n,
            aome2,
            sini,
            cosi,
            e,
            sinw,
            cosw,
        )
        if return_num_eval:
            return res[0], res[1]
        return res[0]
Exemplo n.º 53
0
def apply_global_transform(pose_params, positions):
    R = angle_axis_to_rotation_matrix(pose_params[0])
    s = pose_params[1]
    R *= s[np.newaxis, :]
    t = pose_params[2]
    return T.transpose(T.dot(R, T.transpose(positions))) + t
Exemplo n.º 54
0
 def compute_OD(idx, zS, zD, zAA, zBB):
     OD = T.dot(T.transpose(zS[-idx - 1]), zD[idx])
     return OD
Exemplo n.º 55
0
    def build_decoder(self, query_tokens, query_token_embed, query_token_embed_mask):
        # logging.info('building decoder ...')

        # (batch_size, decoder_state_dim)
        decoder_prev_state = ndim_tensor(2, name='decoder_prev_state')

        # (batch_size, decoder_state_dim)
        decoder_prev_cell = ndim_tensor(2, name='decoder_prev_cell')

        # (batch_size, n_timestep, decoder_state_dim)
        hist_h = ndim_tensor(3, name='hist_h')

        # (batch_size, decoder_state_dim)
        prev_action_embed = ndim_tensor(2, name='prev_action_embed')

        # (batch_size)
        node_id = T.ivector(name='node_id')

        # (batch_size, node_embed_dim)
        node_embed = self.node_embedding[node_id]

        # (batch_size)
        par_rule_id = T.ivector(name='par_rule_id')

        # (batch_size, decoder_state_dim)
        par_rule_embed = T.switch(par_rule_id[:, None] < 0,
                                  T.alloc(0., 1, config.rule_embed_dim),
                                  self.rule_embedding_W[par_rule_id])

        # ([time_step])
        time_steps = T.ivector(name='time_steps')

        # (batch_size)
        parent_t = T.ivector(name='parent_t')

        # (batch_size, 1)
        parent_t_reshaped = T.shape_padright(parent_t)

        query_embed = self.query_encoder_lstm(query_token_embed, mask=query_token_embed_mask,
                                              dropout=config.dropout, train=False)

        # (batch_size, 1, decoder_state_dim)
        prev_action_embed_reshaped = prev_action_embed.dimshuffle((0, 'x', 1))

        # (batch_size, 1, node_embed_dim)
        node_embed_reshaped = node_embed.dimshuffle((0, 'x', 1))

        # (batch_size, 1, node_embed_dim)
        par_rule_embed_reshaped = par_rule_embed.dimshuffle((0, 'x', 1))

        if not config.frontier_node_type_feed:
            node_embed_reshaped *= 0.

        if not config.parent_action_feed:
            par_rule_embed_reshaped *= 0.

        decoder_input = T.concatenate([prev_action_embed_reshaped, node_embed_reshaped, par_rule_embed_reshaped], axis=-1)

        # (batch_size, 1, decoder_state_dim)
        # (batch_size, 1, decoder_state_dim)
        # (batch_size, 1, field_token_encode_dim)
        decoder_next_state_dim3, decoder_next_cell_dim3, ctx_vectors = self.decoder_lstm(decoder_input,
                                                                                         init_state=decoder_prev_state,
                                                                                         init_cell=decoder_prev_cell,
                                                                                         hist_h=hist_h,
                                                                                         context=query_embed,
                                                                                         context_mask=query_token_embed_mask,
                                                                                         parent_t_seq=parent_t_reshaped,
                                                                                         dropout=config.dropout,
                                                                                         train=False,
                                                                                         time_steps=time_steps)

        decoder_next_state = decoder_next_state_dim3.flatten(2)
        # decoder_output = decoder_next_state * (1 - DECODER_DROPOUT)

        decoder_next_cell = decoder_next_cell_dim3.flatten(2)

        decoder_next_state_trans_rule = self.decoder_hidden_state_W_rule(decoder_next_state)
        decoder_next_state_trans_token = self.decoder_hidden_state_W_token(T.concatenate([decoder_next_state, ctx_vectors.flatten(2)], axis=-1))

        rule_prob = softmax(T.dot(decoder_next_state_trans_rule, T.transpose(self.rule_embedding_W)) + self.rule_embedding_b)

        gen_action_prob = self.terminal_gen_softmax(decoder_next_state)

        vocab_prob = softmax(T.dot(decoder_next_state_trans_token, T.transpose(self.vocab_embedding_W)) + self.vocab_embedding_b)

        ptr_net_decoder_state = T.concatenate([decoder_next_state_dim3, ctx_vectors], axis=-1)

        copy_prob = self.src_ptr_net(query_embed, query_token_embed_mask, ptr_net_decoder_state)

        copy_prob = copy_prob.flatten(2)

        inputs = [query_tokens]
        outputs = [query_embed, query_token_embed_mask]

        self.decoder_func_init = theano.function(inputs, outputs)

        inputs = [time_steps, decoder_prev_state, decoder_prev_cell, hist_h, prev_action_embed,
                  node_id, par_rule_id, parent_t,
                  query_embed, query_token_embed_mask]

        outputs = [decoder_next_state, decoder_next_cell,
                   rule_prob, gen_action_prob, vocab_prob, copy_prob]

        self.decoder_func_next_step = theano.function(inputs, outputs)
Exemplo n.º 56
0
def work(mode, data_name, test_dataname, pooling_mode="average_exc_pad"):
    print "mode: ", mode
    print "data_name: ", data_name
    print "pooling_mode: ", pooling_mode
    print "Started!"
    rng = numpy.random.RandomState(23455)
    docSentenceCount = T.ivector("docSentenceCount")
    sentenceWordCount = T.ivector("sentenceWordCount")
    corpus = T.matrix("corpus")
    docLabel = T.ivector('docLabel')

    # for list-type data
    layer0 = DocEmbeddingNN(corpus, docSentenceCount, sentenceWordCount, rng, \
                wordEmbeddingDim=249, \
                 sentenceLayerNodesNum=50, \
                 sentenceLayerNodesSize=[5, 249], \
                 docLayerNodesNum=10, \
                 docLayerNodesSize=[3, 50],
                 pooling_mode=pooling_mode)

    layer1 = HiddenLayer(rng,
                         input=layer0.output,
                         n_in=layer0.outputDimension,
                         n_out=10,
                         activation=T.tanh)

    layer2 = LogisticRegression(input=layer1.output, n_in=10, n_out=2)

    # construct the parameter array.
    params = layer2.params + layer1.params + layer0.params

    # Load the parameters last time, optionally.

    # 	data_name = "car"

    para_path = "data/" + data_name + "/model/multi_input_mergeinput" + pooling_mode + ".model"
    traintext = "data/" + data_name + "/train/text"
    trainlabel = "data/" + data_name + "/train/label"
    testtext = "data/" + test_dataname + "/test/text"
    testlabel = "data/" + test_dataname + "/test/label"

    loadParamsVal(para_path, params)

    if (mode == "train" or mode == "test"):
        learning_rate = 0.1
        error = layer2.errors(docLabel)
        cost = layer2.negative_log_likelihood(docLabel)

        grads = T.grad(cost, params)

        updates = [(param_i, param_i - learning_rate * grad_i)
                   for param_i, grad_i in zip(params, grads)]

        print "Loading test data."
        cr_test = CorpusReader(minDocSentenceNum=5,
                               minSentenceWordNum=5,
                               dataset=testtext,
                               labelset=testlabel)
        validDocMatrixes, validDocSentenceNums, validSentenceWordNums, validIds, validLabels, _, validPosList = cr_test.getCorpus(
            [0, 1000])

        # 		print "Right answer: "
        # 		print zip(validIds, validLabels)

        validDocMatrixes = numpy.column_stack((validDocMatrixes, validPosList))
        validDocMatrixes = transToTensor(validDocMatrixes,
                                         theano.config.floatX)
        # 		validPosList = transToTensor(validPosList, theano.config.floatX)
        validDocSentenceNums = transToTensor(validDocSentenceNums, numpy.int32)
        validSentenceWordNums = transToTensor(validSentenceWordNums,
                                              numpy.int32)
        validLabels = transToTensor(validLabels, numpy.int32)
        print "Data loaded."

        valid_model = theano.function(
            [], [
                cost, error, layer2.y_pred, docLabel,
                T.transpose(layer2.p_y_given_x)[1]
            ],
            givens={
                corpus: validDocMatrixes,
                docSentenceCount: validDocSentenceNums,
                sentenceWordCount: validSentenceWordNums,
                docLabel: validLabels
            },
            allow_input_downcast=True)

        # ####Validate the model####
        costNum, errorNum, pred_label, real_label, pred_prob = valid_model()
        print "Valid current model:"
        print "Cost: ", costNum
        print "Error: ", errorNum
        # 		print "Valid Pred: ", pred_label
        # 		print "pred_prob: ", pred_prob

        fpr, tpr, _ = roc_curve(real_label, pred_prob)
        if mode == "test":
            print "tpr_all: ", tpr
            print "fpr_all: ", fpr
        roc_auc = auc(fpr, tpr)
        print "data_name: ", data_name
        print "test_dataname: ", test_dataname
        print "ROC: ", roc_auc

        fpr, tpr, threshold = roc_curve(real_label, pred_label)

        index_of_one = list(threshold).index(1)
        ar = (tpr[index_of_one] + 1 - fpr[index_of_one]) / 2
        print "TPR: ", tpr[index_of_one]
        print "FPR: ", fpr[index_of_one]
        print "AR: ", ar
        print "threshold: ", threshold[index_of_one]
        if mode == "test":
            valid_model.free()
            return errorNum, roc_auc, tpr[index_of_one], fpr[index_of_one], ar

        print "Loading train data."
        cr_train = CorpusReader(minDocSentenceNum=5,
                                minSentenceWordNum=5,
                                dataset=traintext,
                                labelset=trainlabel)
        docMatrixes, docSentenceNums, sentenceWordNums, ids, labels, _, posList = cr_train.getCorpus(
            [0, 100000])

        # 		print "Right answer: "
        # 		print zip(ids, labels)

        docMatrixes = numpy.column_stack((docMatrixes, posList))
        docMatrixes = transToTensor(docMatrixes, theano.config.floatX)
        # 		posList = transToTensor(posList, theano.config.floatX)
        docSentenceNums = transToTensor(docSentenceNums, numpy.int32)
        sentenceWordNums = transToTensor(sentenceWordNums, numpy.int32)
        labels = transToTensor(labels, numpy.int32)

        # 	valid_cr = CorpusReader(minDocSentenceNum=5, minSentenceWordNum=5, dataset="data/valid/split", labelset="data/valid/label.txt")
        print
        index = T.lscalar("index")
        batchSize = 10
        n_batches = (len(docSentenceNums.get_value()) - 1 - 1) / batchSize + 1
        print
        print "Train set size is ", len(docMatrixes.get_value())
        print "Validating set size is ", len(validDocMatrixes.get_value())
        print "Batch size is ", batchSize
        print "Number of training batches  is ", n_batches

        print "Compiling computing graph."

        # for list-type data
        train_model = theano.function(
            [index], [cost, error, layer2.y_pred, docLabel],
            updates=updates,
            givens={
                corpus:
                docMatrixes,
                docSentenceCount:
                docSentenceNums[index * batchSize:(index + 1) * batchSize + 1],
                sentenceWordCount:
                sentenceWordNums,
                docLabel:
                labels[index * batchSize:(index + 1) * batchSize],
            },
            allow_input_downcast=True)

        print "Compiled."
        print "Start to train."
        epoch = 0
        n_epochs = 10
        ite = 0

        while (epoch < n_epochs):
            epoch = epoch + 1
            #######################
            for i in range(n_batches):
                # for list-type data
                print ".",
                costNum, errorNum, pred_label, real_label = train_model(i)
                print ".",
                ite = ite + 1
                # for padding data
                # 			costNum, errorNum = train_model(docMatrixes, labels)
                # 			del docMatrixes, docSentenceNums, sentenceWordNums, labels
                # print ".",
                if (ite % 10 == 0):
                    print
                    print "@iter: ", ite
                    print "Cost: ", costNum
                    print "Error: ", errorNum

            # Validate the model
            costNum, errorNum, pred_label, real_label, pred_prob = valid_model(
            )
            print "Valid current model:"
            print "Cost: ", costNum
            print "Error: ", errorNum
            # 			print "pred_prob: ", pred_prob
            # 			print "Valid Pred: ", pred_label

            fpr, tpr, _ = roc_curve(real_label, pred_prob)
            roc_auc = auc(fpr, tpr)
            print "data_name: ", data_name
            print "test_dataname: ", test_dataname
            print "ROC: ", roc_auc

            fpr, tpr, threshold = roc_curve(real_label, pred_label)
            index_of_one = list(threshold).index(1)
            print "TPR: ", tpr[index_of_one]
            print "FPR: ", fpr[index_of_one]
            print "AR: ", (tpr[index_of_one] + 1 - fpr[index_of_one]) / 2
            print "threshold: ", threshold[index_of_one]
            # Save model
            print "Saving parameters."
            saveParamsVal(para_path, params)
            print "Saved."
        valid_model.free()
        train_model.free()
    elif (mode == "deploy"):
        print "Compiling computing graph."
        output_model = theano.function(
            [corpus, docSentenceCount, sentenceWordCount], [layer2.y_pred])
        print "Compiled."
        cr = CorpusReader(minDocSentenceNum=5,
                          minSentenceWordNum=5,
                          dataset="data/train_valid/split")
        count = 21000
        while (count <= 21000):
            docMatrixes, docSentenceNums, sentenceWordNums, ids = cr.getCorpus(
                [count, count + 100])
            docMatrixes = numpy.matrix(docMatrixes, dtype=theano.config.floatX)
            docSentenceNums = numpy.array(docSentenceNums, dtype=numpy.int32)
            sentenceWordNums = numpy.array(sentenceWordNums, dtype=numpy.int32)
            print "start to predict."
            pred_y = output_model(docMatrixes, docSentenceNums,
                                  sentenceWordNums)
            print "End predicting."
            print "Writing resfile."
            # 		print zip(ids, pred_y[0])
            f = file("data/test/res/res" + str(count), "w")
            f.write(str(zip(ids, pred_y[0])))
            f.close()
            print "Written." + str(count)
            count += 100
Exemplo n.º 57
0
    def build(self):
        # (batch_size, max_example_action_num, action_type)
        tgt_action_seq = ndim_itensor(3, 'tgt_action_seq')

        # (batch_size, max_example_action_num, action_type)
        tgt_action_seq_type = ndim_itensor(3, 'tgt_action_seq_type')

        # (batch_size, max_example_action_num)
        tgt_node_seq = ndim_itensor(2, 'tgt_node_seq')

        # (batch_size, max_example_action_num)
        tgt_par_rule_seq = ndim_itensor(2, 'tgt_par_rule_seq')

        # (batch_size, max_example_action_num)
        tgt_par_t_seq = ndim_itensor(2, 'tgt_par_t_seq')

        # (batch_size, max_example_action_num, symbol_embed_dim)
        # tgt_node_embed = self.node_embedding(tgt_node_seq, mask_zero=False)
        tgt_node_embed = self.node_embedding[tgt_node_seq]

        # (batch_size, max_query_length)
        query_tokens = ndim_itensor(2, 'query_tokens')

        # (batch_size, max_query_length, query_token_embed_dim)
        # (batch_size, max_query_length)
        query_token_embed, query_token_embed_mask = self.query_embedding(query_tokens, mask_zero=True)

        # if WORD_DROPOUT > 0:
            # logging.info('used word dropout for source, p = %f', WORD_DROPOUT)
        #     query_token_embed, query_token_embed_intact = WordDropout(WORD_DROPOUT, self.srng)(query_token_embed, False)

        batch_size = tgt_action_seq.shape[0]
        max_example_action_num = tgt_action_seq.shape[1]

        # previous action embeddings
        # (batch_size, max_example_action_num, action_embed_dim)
        tgt_action_seq_embed = T.switch(T.shape_padright(tgt_action_seq[:, :, 0] > 0),
                                        self.rule_embedding_W[tgt_action_seq[:, :, 0]],
                                        self.vocab_embedding_W[tgt_action_seq[:, :, 1]])

        tgt_action_seq_embed_tm1 = tensor_right_shift(tgt_action_seq_embed)

        # parent rule application embeddings
        tgt_par_rule_embed = T.switch(tgt_par_rule_seq[:, :, None] < 0,
                                      T.alloc(0., 1, config.rule_embed_dim),
                                      self.rule_embedding_W[tgt_par_rule_seq])

        if not config.frontier_node_type_feed:
            tgt_node_embed *= 0.

        if not config.parent_action_feed:
            tgt_par_rule_embed *= 0.

        # (batch_size, max_example_action_num, action_embed_dim + symbol_embed_dim + action_embed_dim)
        decoder_input = T.concatenate([tgt_action_seq_embed_tm1, tgt_node_embed, tgt_par_rule_embed], axis=-1)

        # (batch_size, max_query_length, query_embed_dim)
        query_embed = self.query_encoder_lstm(query_token_embed, mask=query_token_embed_mask,
                                              dropout=config.dropout, srng=self.srng)

        # (batch_size, max_example_action_num)
        tgt_action_seq_mask = T.any(tgt_action_seq_type, axis=-1)
        
        # decoder_hidden_states: (batch_size, max_example_action_num, lstm_hidden_state)
        # ctx_vectors: (batch_size, max_example_action_num, encoder_hidden_dim)
        decoder_hidden_states, _, ctx_vectors = self.decoder_lstm(decoder_input,
                                                                  context=query_embed,
                                                                  context_mask=query_token_embed_mask,
                                                                  mask=tgt_action_seq_mask,
                                                                  parent_t_seq=tgt_par_t_seq,
                                                                  dropout=config.dropout,
                                                                  srng=self.srng)

        # if DECODER_DROPOUT > 0:
            # logging.info('used dropout for decoder output, p = %f', DECODER_DROPOUT)
        #     decoder_hidden_states = Dropout(DECODER_DROPOUT, self.srng)(decoder_hidden_states)

        # ====================================================
        # apply additional non-linearity transformation before
        # predicting actions
        # ====================================================

        decoder_hidden_state_trans_rule = self.decoder_hidden_state_W_rule(decoder_hidden_states)
        decoder_hidden_state_trans_token = self.decoder_hidden_state_W_token(T.concatenate([decoder_hidden_states, ctx_vectors], axis=-1))

        # (batch_size, max_example_action_num, rule_num)
        rule_predict = softmax(T.dot(decoder_hidden_state_trans_rule, T.transpose(self.rule_embedding_W)) + self.rule_embedding_b)

        # (batch_size, max_example_action_num, 2)
        terminal_gen_action_prob = self.terminal_gen_softmax(decoder_hidden_states)

        # (batch_size, max_example_action_num, target_vocab_size)
        vocab_predict = softmax(T.dot(decoder_hidden_state_trans_token, T.transpose(self.vocab_embedding_W)) + self.vocab_embedding_b)

        # (batch_size, max_example_action_num, lstm_hidden_state + encoder_hidden_dim)
        ptr_net_decoder_state = T.concatenate([decoder_hidden_states, ctx_vectors], axis=-1)

        # (batch_size, max_example_action_num, max_query_length)
        copy_prob = self.src_ptr_net(query_embed, query_token_embed_mask, ptr_net_decoder_state)

        # (batch_size, max_example_action_num)
        rule_tgt_prob = rule_predict[T.shape_padright(T.arange(batch_size)),
                                     T.shape_padleft(T.arange(max_example_action_num)),
                                     tgt_action_seq[:, :, 0]]

        # (batch_size, max_example_action_num)
        vocab_tgt_prob = vocab_predict[T.shape_padright(T.arange(batch_size)),
                                       T.shape_padleft(T.arange(max_example_action_num)),
                                       tgt_action_seq[:, :, 1]]

        # (batch_size, max_example_action_num)
        copy_tgt_prob = copy_prob[T.shape_padright(T.arange(batch_size)),
                                  T.shape_padleft(T.arange(max_example_action_num)),
                                  tgt_action_seq[:, :, 2]]


        # (batch_size, max_example_action_num)
        tgt_prob = tgt_action_seq_type[:, :, 0] * rule_tgt_prob + \
                   tgt_action_seq_type[:, :, 1] * terminal_gen_action_prob[:, :, 0] * vocab_tgt_prob + \
                   tgt_action_seq_type[:, :, 2] * terminal_gen_action_prob[:, :, 1] * copy_tgt_prob

        likelihood = T.log(tgt_prob + 1.e-7 * (1 - tgt_action_seq_mask))
        loss = - (likelihood * tgt_action_seq_mask).sum(axis=-1) # / tgt_action_seq_mask.sum(axis=-1)
        loss = T.mean(loss)

        # let's build the function!
        train_inputs = [query_tokens, tgt_action_seq, tgt_action_seq_type,
                        tgt_node_seq, tgt_par_rule_seq, tgt_par_t_seq]
        optimizer = optimizers.get(config.optimizer)
        optimizer.clip_grad = config.clip_grad
        updates, grads = optimizer.get_updates(self.params, loss)
        self.train_func = theano.function(train_inputs, [loss],
                                          # [loss, tgt_action_seq_type, tgt_action_seq,
                                          #  rule_tgt_prob, vocab_tgt_prob, copy_tgt_prob,
                                          #  copy_prob, terminal_gen_action_prob],
                                          updates=updates)

        # if WORD_DROPOUT > 0:
        #     self.build_decoder(query_tokens, query_token_embed_intact, query_token_embed_mask)
        # else:
        #     self.build_decoder(query_tokens, query_token_embed, query_token_embed_mask)

        self.build_decoder(query_tokens, query_token_embed, query_token_embed_mask)
Exemplo n.º 58
0
def transpose(x):
    return T.transpose(x)
Exemplo n.º 59
0

f, phi_m = inp.input_var, lb_op.input_var  # f - inputs, phi_m - basis  # f.shape = Nxl, phi_m.shape = Nxn

f = T.printing.Print('f')(f)
phi_m = T.printing.Print('phi_m')(phi_m)

# compute A - the input coefficients matrix
A = utils_lasagne.desc_coeff(f, phi_m[:, 0:neigen])
A = T.printing.Print('A')(A)
# compute B - the reference coefficients matrix
# B, At, AtA, AtAi, AtB = ldiv(phi_n[:, 0: neigen], f)
B = utils_lasagne.desc_coeff(f, phi_n[:, 0:neigen])
B = T.printing.Print('B')(B)
# compute C using least-squares: argmin_X( ||X*A - B||^2 )
C = T.transpose(utils_lasagne.ldiv(T.transpose(A), T.transpose(B)))
C = T.printing.Print('C')(C)
# apply mapping A*C
Br = T.dot(C, A)
Br = T.printing.Print('Br')(Br)
# compute smoothed mapped functions g
output = T.dot(phi_n[:, 0:neigen], Br)

funcs = dict()
funcs['predict'] = theano.function(
    [inp.input_var, lb_op.input_var],
    [output, A, B, C, Br],  #, At, AtA, AtAi, AtB],
    on_unused_input='warn')

# output_, A_, B_, C_, Br_, gr_, At_, AtA_, AtAi_, AtB_ = funcs['predict'](*x_)
output_, A_, B_, C_, Br_, gr_ = funcs['predict'](*x_)
Exemplo n.º 60
0
import theano
import theano.tensor as T
from theano import pp
from theano import function
import numpy as np
from ipdb import set_trace

conv5 = T.ftensor4()
sim_map = T.ftensor3()
top_diff = T.ftensor4()

batch_size, c, h, w = conv5.shape
value = T.reshape(conv5, newshape=(batch_size, c, h * w))
value = T.transpose(value, axes=(0, 2, 1))

context = T.batched_dot(sim_map, value)
context = T.transpose(context, axes=(0, 2, 1))
context = T.reshape(context, newshape=(batch_size, c, h, w))

fuse = context + conv5

fuse_sum = T.sum(fuse * top_diff)

forward_theano = theano.function([conv5, sim_map], fuse)
backward_theano = theano.function([conv5, sim_map, top_diff],
                                  T.grad(fuse_sum, conv5))

one = np.ones(shape=(3, 3))
np_conv5 = np.stack([one, one + 1, one + 2, one + 3],
                    axis=0).astype(np.float32)