Пример #1
0
    def lecun_lcn(self, X, kernel_size=7, threshold = 1e-4, use_divisor=False):
        """
        Yann LeCun's local contrast normalization
        Orginal code in Theano by: Guillaume Desjardins
        """

        filter_shape = (1, 1, kernel_size, kernel_size)
        filters = gaussian_filter(kernel_size).reshape(filter_shape)
        filters = shared(_asarray(filters, dtype=floatX), borrow=True)

        convout = conv2d(X, filters=filters, filter_shape=filter_shape, 
                            border_mode='full')

        # For each pixel, remove mean of kernel_sizexkernel_size neighborhood
        mid = int(floor(kernel_size/2.))
        new_X = X - convout[:,:,mid:-mid,mid:-mid]

        if use_divisor:
            # Scale down norm of kernel_sizexkernel_size patch
            sum_sqr_XX = conv2d(T.sqr(T.abs_(X)), filters=filters, 
                                filter_shape=filter_shape, border_mode='full')

            denom = T.sqrt(sum_sqr_XX[:,:,mid:-mid,mid:-mid])
            per_img_mean = denom.mean(axis=[2,3])
            divisor = T.largest(per_img_mean.dimshuffle(0,1,'x','x'), denom)
            divisor = T.maximum(divisor, threshold)

            new_X /= divisor

        return new_X#T.cast(new_X, floatX)
Пример #2
0
def lcn_3d_input(data, kernel_shape, n_maps):

    """
    :param data: [examples, depth, filters, height, width]
    :param kernel_shape: int
    :param n_maps: int
    :return: new_x: [examples, depth, filters, height, width]
    """

    # create symbolic variable for the input data
    ftensor5 = T.TensorType('float32', [False] * 5)
    x = ftensor5()

    # # determine the number of maps
    # n_maps = data.shape[2]

    # create 3d filter that spans across all channels / feature maps
    # todo: kernel is not really in 3d; need 3d implementation instead of 2d repeated across third dimension
    # todo: alternative is to keep 2d kernel and extend short range given data size in z-plane; change first kernel_sh.
    filter_shape = (1, kernel_shape[0], n_maps, kernel_shape[1], kernel_shape[2])
    filters = np.resize(gaussian_filter(kernel_shape[1]), filter_shape)
    filters = filters / np.sum(filters)
    filters = sharedX(filters)

    # convolve filter with input signal
    convolution_out = conv3d(
        signals=x,
        filters=filters,
        signals_shape=data.shape,
        filters_shape=filter_shape,
        border_mode='valid'
    )

    # for each pixel, remove mean of 9x9 neighborhood
    mid_0 = int(np.floor(kernel_shape[0] / 2.))
    mid_1 = int(np.floor(kernel_shape[1] / 2.))
    mid_2 = int(np.floor(kernel_shape[2] / 2.))
    mean = T.tile(convolution_out, (1, 1, n_maps, 1, 1))
    padded_mean = T.zeros_like(x)
    padded_mean = T.set_subtensor(padded_mean[:, mid_0:-mid_0, :, mid_1:-mid_1, mid_2:-mid_2], mean)
    centered_data = data - padded_mean

    # scale down norm of 9x9 patch if norm is bigger than 1
    sum_sqr_xx = conv3d(signals=T.sqr(data), filters=filters)
    denominator = T.tile(T.sqrt(sum_sqr_xx), (1, 1, n_maps, 1, 1))
    padded_denominator = T.ones_like(x)
    padded_denominator = T.set_subtensor(
        padded_denominator[:, mid_0:-mid_0, :, mid_1:-mid_1, mid_2:-mid_2], denominator
    )
    per_img_mean = padded_denominator.mean(axis=[1, 2, 3, 4])
    divisor = T.largest(
        per_img_mean.dimshuffle(0, 'x', 'x', 'x', 'x'),
        padded_denominator
    )
    new_x = centered_data / T.maximum(1., divisor)

    # compile theano function
    f = theano.function([x], new_x)

    return f(data)
Пример #3
0
    def lecun_lcn(self, X, kernel_size=7, threshold=1e-4, use_divisor=True):
        """
        Yann LeCun's local contrast normalization
        Orginal code in Theano by: Guillaume Desjardins
        """

        filter_shape = (1, 1, kernel_size, kernel_size)
        filters = self.gaussian_filter(kernel_size).reshape(filter_shape)
        # filters = shared(_asarray(filters, dtype=floatX), borrow=True)
        filters = K.variable(filters)

        convout = K.conv2d(X,
                           filters,
                           filter_shape=filter_shape,
                           border_mode='same')

        # For each pixel, remove mean of kernel_sizexkernel_size neighborhood
        new_X = X - convout

        if use_divisor:
            # Scale down norm of kernel_sizexkernel_size patch
            sum_sqr_XX = K.conv2d(K.pow(K.abs(new_X), 2),
                                  filters,
                                  filter_shape=filter_shape,
                                  border_mode='same')

            denom = T.sqrt(sum_sqr_XX)
            per_img_mean = denom.mean(axis=[2, 3])
            divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)
            divisor = T.maximum(divisor, threshold)

            new_X /= divisor

        return new_X
Пример #4
0
def LCN(data, kernel_shape):
    
    # X = T.ftensor4()

    filter_shape = (1, 1, kernel_shape, kernel_shape)
    filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape))
    
    convout = conv2d(data, filters=filters, border_mode='full')
    
    # For each pixel, remove mean of 9x9 neighborhood
    mid = int(np.floor(kernel_shape/ 2.))
    centered_X = data - convout[:,:,mid:-mid,mid:-mid]
    
    # Scale down norm of 9x9 patch if norm is bigger than 1
    sum_sqr_XX = conv2d(T.sqr(data), filters=filters, border_mode='full')
    
    denom = T.sqrt(sum_sqr_XX[:,:,mid:-mid,mid:-mid])
    per_img_mean = denom.mean(axis = [2,3])
    divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)
    
    new_X = centered_X / T.maximum(1., divisor)
    # new_X = new_X[:,:,mid:-mid, mid:-mid]

    new_X = T.extra_ops.squeeze(new_X)  # remove broadcastable dimension
    new_X = new_X[:, 0, :, :]  # TODO: check whether this forced squeeze is good

    return new_X
Пример #5
0
    def apply(self, dataset, can_fit=True):
        x = dataset.get_design_matrix()

        denseX = T.matrix(dtype=x.dtype)

        image_shape = (len(x),) + self.img_shape
        X = denseX.reshape(image_shape)
        ones_patch = T.ones((1,1,9,9), dtype=x.dtype)

        convout = conv.conv2d(input = X,
                             filters = ones_patch / (9.*9.),
                             image_shape = image_shape,
                             filter_shape = (1, 1, 9, 9),
                             border_mode='full')

        # For each pixel, remove mean of 3x3 neighborhood
        centered_X = X - convout[:,:,4:-4,4:-4]
        
        # Scale down norm of 3x3 patch if norm is bigger than 1
        sum_sqr_XX = conv.conv2d(input = centered_X**2,
                             filters = ones_patch,
                             image_shape = image_shape,
                             filter_shape = (1, 1, 9, 9),
                             border_mode='full')
        denom = T.sqrt(sum_sqr_XX[:,:,4:-4,4:-4])
        xdenom = denom.reshape(X.shape)
        new_X = centered_X / T.largest(1.0, xdenom)
        new_X = T.flatten(new_X, outdim=2)

        f = theano.function([denseX], new_X)
        dataset.set_design_matrix(f(x))
Пример #6
0
def _lcn(image, im_shape, fmaps, pool_depth, width, sigma):
    """
    """
    import theano
    import theano.tensor as T
    from theano.tensor.nnet import conv

    border = width//2
    filters = _lcn_filters(fmaps, pool_depth, width, sigma) 
    filter_shape = filters.shape
    blurred_mean = conv.conv2d(input=image, filters=filters, 
            image_shape=im_shape, filter_shape=filter_shape,
            border_mode='full')
    image -= blurred_mean[:, :, border:-border, border:-border]
    
    image_sqr = T.sqr(image)
    blurred_sqr = conv.conv2d(input=image_sqr, filters=filters, 
            image_shape=im_shape, filter_shape=filter_shape,
            border_mode='full')

    div = T.sqrt(blurred_sqr[:, :, border:-border, border:-border])
    fm_mean = div.mean(axis=[2, 3])
    div = T.largest(fm_mean.dimshuffle(0, 1, 'x', 'x'), div) + 1e-6
    image = image/div
    return T.cast(image, theano.config.floatX)
Пример #7
0
	def setup_theano(self):
		self.vocab_mat = T.fmatrix('vocab')
		self.sample = T.fmatrix('sample')
		b = T.fvector('b')
		W = T.fmatrix('W')
		f = self.transform_function(
			W, 
			b, 
			self.wordvec_transform(self.sample, self.vocab_mat))
		s = T.sum(f)

		self.corrupt_sample = T.fmatrix('corrupt-sample')
		f_corrupt = self.transform_function(
			W,
			b,
			self.wordvec_transform(self.corrupt_sample, self.vocab_mat))
		s_corrupt = T.sum(f_corrupt)
		J = T.largest(0, 1 - s + s_corrupt)
		self.grad = theano.grad(J, [b, W, self.vocab_mat])

		self.grad_fn = theano.function(
			[self.sample, self.corrupt_sample, b, W, self.vocab_mat],
			self.grad,
			allow_input_downcast=True)

		self.exec_fn = theano.function([self.sample, b, W, self.vocab_mat],
			f,
			allow_input_downcast=True)
Пример #8
0
    def lecun_lcn(self,
                  X,
                  kernel_size=9,
                  threshold=1e-4,
                  use_divisor=True,
                  border=False):
        """
        Yann LeCun's local contrast normalization
        Orginal code in Theano by: Guillaume Desjardins
        """

        filter_shape = (1, 1, kernel_size, kernel_size)
        filters = gaussian_filter(kernel_size).reshape(filter_shape)
        filters = shared(_asarray(filters, dtype=floatX), borrow=True)
        mid = int(floor(kernel_size / 2.))

        if border:
            r = (kernel_size - 1) / 2
            up = X[:, :, 0:1, :].repeat(r, axis=2)
            down = X[:, :, -1:, :].repeat(r, axis=2)
            X_ = T.concatenate([up, X, down], axis=2)
            left = X_[:, :, :, 0:1].repeat(r, axis=3)
            right = X_[:, :, :, -1:].repeat(r, axis=3)
            X_ = T.concatenate([left, X_, right], axis=3)

            convout = conv2d(X_,
                             filters=filters,
                             filter_shape=filter_shape,
                             border_mode='valid')
            centered_X = X - convout

        else:
            convout = conv2d(X,
                             filters=filters,
                             filter_shape=filter_shape,
                             border_mode='full')

            # For each pixel, remove mean of kernel_sizexkernel_size neighborhood
            centered_X = X - convout[:, :, mid:-mid, mid:-mid]

        if use_divisor:
            # Scale down norm of kernel_sizexkernel_size patch
            sum_sqr_XX = conv2d(T.sqr(X),
                                filters=filters,
                                filter_shape=filter_shape,
                                border_mode='full')

            sum_sqr_XX = sum_sqr_XX[:, :, mid:-mid, mid:-mid]
            sum_sqr_XX = T.maximum(sum_sqr_XX, threshold)
            denom = T.sqrt(sum_sqr_XX)
            # denom = abs(centered_X)
            per_img_mean = denom.mean(axis=[2, 3])
            divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)
            divisor = T.maximum(divisor, threshold)

            new_X = centered_X / divisor
            return new_X

        else:
            return centered_X
Пример #9
0
    def lecun_lcn(self, X, kernel_size=7, threshold=1e-4, use_divisor=False):
        """
        Yann LeCun's local contrast normalization
        Orginal code in Theano by: Guillaume Desjardins
        """

        filter_shape = (1, 1, kernel_size, kernel_size)
        filters = gaussian_filter(kernel_size).reshape(filter_shape)
        filters = shared(_asarray(filters, dtype=floatX), borrow=True)

        convout = conv2d(X,
                         filters=filters,
                         filter_shape=filter_shape,
                         border_mode='full')

        # For each pixel, remove mean of kernel_sizexkernel_size neighborhood
        mid = int(floor(kernel_size / 2.))
        new_X = X - convout[:, :, mid:-mid, mid:-mid]

        if use_divisor:
            # Scale down norm of kernel_sizexkernel_size patch
            sum_sqr_XX = conv2d(T.sqr(T.abs_(new_X)),
                                filters=filters,
                                filter_shape=filter_shape,
                                border_mode='full')

            denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid])
            per_img_mean = denom.mean(axis=[2, 3])
            divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)
            divisor = T.maximum(divisor, threshold)

            new_X /= divisor

        return new_X  #T.cast(new_X, floatX)
Пример #10
0
	def __init__(self):
		self.configured = False
		self.vocab_mat = T.fmatrix('vocab')
		# x has size num_samples x (window_size * vec_width)
		self.x = T.fmatrix('x')
		b = T.fvector('b')
		W = T.fmatrix('W')
		f = 1 / (1 + T.exp(-(W*(self.x.dot(self.vocab_mat) + b))))
		s = T.sum(f)

		self.exec_fn = theano.function([self.x, b, W, self.vocab_mat],
			f,
			allow_input_downcast=True)

		self.x_c = T.fmatrix('x_c')
		f_c = 1 / (1 + T.exp(-(W*(self.x_c.dot(self.vocab_mat)) + b)))
		s_c = T.sum(f_c)

		J = T.largest(0, 1 - s + s_c)
		self.grad = theano.grad(J, [b, W, self.vocab_mat])

		self.grad_fn = theano.function(
			[self.x, self.x_c, b, W, self.vocab_mat],
			self.grad,
			allow_input_downcast=True)
Пример #11
0
    def apply(self, dataset, can_fit=True):
        x = dataset.get_design_matrix()

        denseX = T.matrix(dtype=x.dtype)

        image_shape = (len(x),) + self.img_shape
        X = denseX.reshape(image_shape)
        filters = gaussian_filter_9x9().reshape((1,1,9,9))

        convout = conv.conv2d(input = X,
                             filters = filters,
                             image_shape = image_shape,
                             filter_shape = (1, 1, 9, 9),
                             border_mode='full')

        # For each pixel, remove mean of 9x9 neighborhood
        centered_X = X - convout[:,:,4:-4,4:-4]
        
        # Scale down norm of 9x9 patch if norm is bigger than 1
        sum_sqr_XX = conv.conv2d(input = centered_X**2,
                             filters = filters,
                             image_shape = image_shape,
                             filter_shape = (1, 1, 9, 9),
                             border_mode='full')
        denom = T.sqrt(sum_sqr_XX[:,:,4:-4,4:-4])
        per_img_mean = T.mean(T.flatten(denom, outdim=3), axis=2)
        divisor = T.largest(per_img_mean.dimshuffle((0,1,'x','x')), denom)

        new_X = centered_X / divisor
        new_X = T.flatten(new_X, outdim=2)

        f = theano.function([denseX], new_X)
        dataset.set_design_matrix(f(x))
Пример #12
0
	def __init__(self,
				 word_vec_width,
				 batch_size,
				 num_hidden,
				 learning_rate=0.1):
		self.num_hidden = num_hidden
		self.learning_rate = learning_rate
		self.word_vec_width = word_vec_width
		self.batch_size = batch_size

		self.vocab_mat = T.fmatrix('vocab')
		self.word_onehot = T.fmatrix('word_onehot')
		b = T.fvector('b')
		W = T.fmatrix('W')
		f = 1 / (1 + T.exp(-(W * (self.word_onehot.dot(self.vocab_mat) + b))))
		s = T.sum(f)

		self.exec_fn = theano.function(
			[self.word_onehot, b, W, self.vocab_mat],
			f,
			allow_input_downcast=True)

		self.word_onehot_c = T.fmatrix('word_onehot_c')
		f_c = 1 / (1 + T.exp(-(W * (self.word_onehot_c.dot(self.vocab_mat)) + b)))
		s_c = T.sum(f_c)

		J = T.largest(0, 1 - s + s_c)
		self.grad = theano.grad(J, [b, W, self.vocab_mat])

		self.grad_fn = theano.function(
			[self.word_onehot, self.word_onehot_c, b, W, self.vocab_mat],
			self.grad,
			allow_input_downcast=True)
Пример #13
0
def lecun_lcn(input, img_shape, kernel_shape, threshold=1e-4):
    """
    Yann LeCun's local contrast normalization
    Orginal code in Theano by: Guillaume Desjardins
    """
    input = input.reshape(input.shape[0], input.shape[1], input.shape[2], 1)
    X = tensor.matrix(dtype=input.dtype)
    X = X.reshape((len(input), img_shape[0], img_shape[1], 1))

    filter_shape = (1, 1, kernel_shape, kernel_shape)
    filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape))

    input_space = Conv2DSpace(shape=img_shape, num_channels=1)
    transformer = Conv2D(filters=filters, batch_size=len(input), input_space=input_space, border_mode="full")
    convout = transformer.lmul(X)

    # For each pixel, remove mean of 9x9 neighborhood
    mid = int(numpy.floor(kernel_shape / 2.0))
    centered_X = X - convout[:, mid:-mid, mid:-mid, :]

    # Scale down norm of 9x9 patch if norm is bigger than 1
    transformer = Conv2D(filters=filters, batch_size=len(input), input_space=input_space, border_mode="full")
    sum_sqr_XX = transformer.lmul(X ** 2)

    denom = tensor.sqrt(sum_sqr_XX[:, mid:-mid, mid:-mid, :])
    per_img_mean = denom.mean(axis=[1, 2])
    divisor = tensor.largest(per_img_mean.dimshuffle(0, "x", "x", 1), denom)
    divisor = tensor.maximum(divisor, threshold)

    new_X = centered_X / divisor
    new_X = tensor.flatten(new_X, outdim=3)

    f = function([X], new_X)
    return f(input)
Пример #14
0
def LCNinput(data, kernel_shape):
    
    X = T.ftensor4()
    filter_shape = (1, 1, kernel_shape, kernel_shape)
    filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape))
    
    convout = conv2d(X, filters=filters, border_mode='full')
    
    # For each pixel, remove mean of 9x9 neighborhood
    mid = int(np.floor(kernel_shape/ 2.))
    centered_X = X - convout[:,:,mid:-mid,mid:-mid]
    
    # Scale down norm of 9x9 patch if norm is bigger than 1
    sum_sqr_XX = conv2d(T.sqr(X), filters=filters, border_mode='full')
    
    denom = T.sqrt(sum_sqr_XX[:,:,mid:-mid,mid:-mid])
    per_img_mean = denom.mean(axis = [2,3])
    divisor = T.largest(per_img_mean.dimshuffle(0,1, 'x', 'x'), denom)
    
    new_X = centered_X / T.maximum(1., divisor)
    # new_X = new_X[:,:,mid:-mid, mid:-mid]
    
    f = theano.function([X], new_X)
    
    return f(data)
Пример #15
0
def lecun_lcn(input, img_shape, kernel_shape):
        input = input.reshape(input.shape[0], input.shape[1], input.shape[2], 1)
        X = T.matrix(dtype=input.dtype)
        X = X.reshape((len(input), img_shape[0], img_shape[1], 1))

        filter_shape = (1, 1, kernel_shape, kernel_shape)
        filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape))

        input_space = Conv2DSpace(shape = img_shape, num_channels = 1)
        transformer = Conv2D(filters = filters, batch_size = len(input),
                            input_space = input_space,
                            border_mode = 'full')
        convout = transformer.lmul(X)

        # For each pixel, remove mean of 9x9 neighborhood
        mid = int(np.floor(kernel_shape/ 2.))
        centered_X = X - convout[:,mid:-mid,mid:-mid,:]

        # Scale down norm of 9x9 patch if norm is bigger than 1
        transformer = Conv2D(filters = filters, batch_size = len(input),
                            input_space = input_space,
                            border_mode = 'full')
        sum_sqr_XX = transformer.lmul(X**2)

        denom = T.sqrt(sum_sqr_XX[:,mid:-mid,mid:-mid,:])
        per_img_mean = denom.mean(axis = [1,2])
        divisor = T.largest(per_img_mean.dimshuffle(0,'x', 'x', 1), denom)

        new_X = centered_X / divisor
        new_X = T.flatten(new_X, outdim=3)

        f = function([X], new_X)
        return f(input)
Пример #16
0
def get_train(U_Ot, U_R, lenW, n_facts):
    def phi_x1(x_t, L):
        return T.concatenate([L[x_t].reshape((-1,)), zeros((2*lenW,)), zeros((3,))], axis=0)
    def phi_x2(x_t, L):
        return T.concatenate([zeros((lenW,)), L[x_t].reshape((-1,)), zeros((lenW,)), zeros((3,))], axis=0)
    def phi_y(x_t, L):
        return T.concatenate([zeros((2*lenW,)), L[x_t].reshape((-1,)), zeros((3,))], axis=0)
    def phi_t(x_t, y_t, yp_t, L):
        return T.concatenate([zeros(3*lenW,), T.stack(T.switch(T.lt(x_t,y_t), 1, 0), T.switch(T.lt(x_t,yp_t), 1, 0), T.switch(T.lt(y_t,yp_t), 1, 0))], axis=0)
    def s_Ot(xs, y_t, yp_t, L):
        result, updates = theano.scan(
            lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), U_Ot.T),
                           T.dot(U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t(x_t, y_t, yp_t, L)))),
            sequences=[xs, T.arange(T.shape(xs)[0])])
        return result.sum()
    def sR(xs, y_t, L, V):
        result, updates = theano.scan(
            lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), U_R.T),
                                 T.dot(U_R, phi_y(y_t, V))),
            sequences=[xs, T.arange(T.shape(xs)[0])])
        return result.sum()

    x_t = T.iscalar('x_t')
    m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)]
    f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)]
    r_t = T.iscalar('r_t')
    gamma = T.scalar('gamma')
    L = T.fmatrix('L') # list of messages
    V = T.fmatrix('V') # vocab
    r_args = T.stack(*m)

    cost_arr = [0] * 2 * (len(m)-1)
    updates_arr = [0] * 2 * (len(m)-1)
    for i in xrange(len(m)-1):
        cost_arr[2*i], updates_arr[2*i] = theano.scan(
                lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)-1)), 0, T.largest(gamma - s_Ot(T.stack(*m[:i+1]), f[i], t, L), 0)),
            sequences=[L, T.arange(T.shape(L)[0])])
        cost_arr[2*i+1], updates_arr[2*i+1] = theano.scan(
                lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)-1)), 0, T.largest(gamma + s_Ot(T.stack(*m[:i+1]), t, f[i], L), 0)),
            sequences=[L, T.arange(T.shape(L)[0])])

    cost1, u1 = theano.scan(
        lambda r_bar, t: T.switch(T.eq(r_t, t), 0, T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)),
        sequences=[V, T.arange(T.shape(V)[0])])

    cost = cost1.sum()
    for c in cost_arr:
        cost += c.sum()

    g_uo, g_ur = T.grad(cost, [U_Ot, U_R])

    train = theano.function(
        inputs=[r_t, gamma, L, V] + m + f,
        outputs=[cost],
        updates=[(U_Ot, U_Ot-alpha*g_uo), (U_R, U_R-alpha*g_ur)])
    return train
Пример #17
0
 def update(s, prev_diff, v, reward, tps):
     max_v = float("-inf")
     v_template = T.zeros_like(v)
     for a in range(n_actions):
         tp = tps[s, a, :]
         max_v = T.largest(max_v, T.dot(tp, reward + discount * v))
     new_diff = abs(v[s] - max_v)
     if T.lt(prev_diff, new_diff):
         diff = new_diff
     else:
         diff = prev_diff
     return (diff, T.set_subtensor(v_template[s], max_v)), {}
 def update(s, prev_diff, v, reward, tps):
     max_v = float("-inf")
     v_template = T.zeros_like(v)
     for a in range(n_actions):
         tp = tps[s, a, :]
         max_v = T.largest(max_v, T.dot(tp, reward + discount*v))
     new_diff = abs(v[s] - max_v)
     if T.lt(prev_diff, new_diff):
         diff = new_diff
     else:
         diff = prev_diff
     return (diff, T.set_subtensor(v_template[s], max_v)), {}
Пример #19
0
def _mmd2_and_ratio(K_XX,
                    K_XY,
                    K_YY,
                    unit_diagonal=False,
                    biased=False,
                    min_var_est=_eps):
    mmd2, var_est = _mmd2_and_variance(K_XX,
                                       K_XY,
                                       K_YY,
                                       unit_diagonal=unit_diagonal,
                                       biased=biased)
    ratio = mmd2 / T.sqrt(T.largest(var_est, min_var_est))
    return mmd2, ratio
Пример #20
0
        def predict_odim(Lmm, Amm, beta_sp, hyp, X_sp, x):
            hyps = (hyp[:idims+1], hyp[idims+1])
            kernel_func = partial(cov.Sum, hyps, self.covs)

            k = kernel_func(x, X_sp).flatten()
            mean = k.dot(beta_sp)
            kL = solve_lower_triangular(Lmm, k)
            kA = solve_lower_triangular(Amm, Lmm.T.dot(k))
            variance = kernel_func(x, all_pairs=False)
            variance += -(kL.dot(kL) + kA.dot(kA))
            variance = tt.largest(variance, 0.0) + 1e-3

            return mean, variance
Пример #21
0
    def apply(self, X):
        X_conv = nnfuns.relu(self.apply_lin(X))             #full convolution

        #for each pixel remove mean of (filter_size[0]xfilter_size[1]) neighbourhood
        mid = int(np.floor(self.filter_size[0]/2.))         #middle value
        X_centered = X - X_conv[:,:,mid:-mid, mid:-mid]     #same shape as X

        X_sq = nnfuns.relu(self.apply_lin(X_centered ** 2))

        denom = T.sqrt(X_sq[:,:,mid:-mid, mid:-mid])
        per_img_mean = denom.mean(axis = [2,3])
        divisor = T.largest(per_img_mean.dimshuffle(0,1, 'x', 'x'), denom)
        new_X = X_centered / T.maximum(1., divisor)         #same format as input
        return new_X
Пример #22
0
    def apply(self, X):
        X_conv = nnfuns.relu(self.apply_lin(X))  #full convolution

        #for each pixel remove mean of (filter_size[0]xfilter_size[1]) neighbourhood
        mid = int(np.floor(self.filter_size[0] / 2.))  #middle value
        X_centered = X - X_conv[:, :, mid:-mid, mid:-mid]  #same shape as X

        X_sq = nnfuns.relu(self.apply_lin(X_centered**2))

        denom = T.sqrt(X_sq[:, :, mid:-mid, mid:-mid])
        per_img_mean = denom.mean(axis=[2, 3])
        divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)
        new_X = X_centered / T.maximum(1., divisor)  #same format as input
        return new_X
Пример #23
0
    def lecun_lcn(self, X, kernel_size=9, threshold = 1e-4, use_divisor=True, border=False):
        """
        Yann LeCun's local contrast normalization
        Orginal code in Theano by: Guillaume Desjardins
        """

        filter_shape = (1, 1, kernel_size, kernel_size)
        filters = gaussian_filter(kernel_size).reshape(filter_shape)
        filters = shared(_asarray(filters, dtype=floatX), borrow=True)
        mid = int(floor(kernel_size/2.))

        if border:
            r = (kernel_size-1)/2
            up = X[:,:,0:1,:].repeat(r,axis=2)
            down = X[:,:,-1:,:].repeat(r,axis=2)
            X_ = T.concatenate([up,X,down],axis=2)
            left = X_[:,:,:,0:1].repeat(r,axis=3)
            right = X_[:,:,:,-1:].repeat(r,axis=3)
            X_ = T.concatenate([left,X_,right],axis=3)

            convout = conv2d(X_, filters=filters, filter_shape=filter_shape, 
                                border_mode='valid')
            centered_X = X - convout

        else:
            convout = conv2d(X, filters=filters, filter_shape=filter_shape, 
                                border_mode='full')

            # For each pixel, remove mean of kernel_sizexkernel_size neighborhood
            centered_X = X - convout[:,:,mid:-mid,mid:-mid]

        if use_divisor:
            # Scale down norm of kernel_sizexkernel_size patch
            sum_sqr_XX = conv2d(T.sqr(X), filters=filters, 
                                filter_shape=filter_shape, border_mode='full')

            sum_sqr_XX = sum_sqr_XX[:,:,mid:-mid,mid:-mid]
            sum_sqr_XX = T.maximum(sum_sqr_XX, threshold)
            denom = T.sqrt(sum_sqr_XX)
            # denom = abs(centered_X)
            per_img_mean = denom.mean(axis=[2,3])
            divisor = T.largest(per_img_mean.dimshuffle(0,1,'x','x'), denom)
            divisor = T.maximum(divisor, threshold)

            new_X = centered_X / divisor
            return new_X

        else: 
            return centered_X
def gen_fcn(batch_size,
            img_shape,
            kernel_size,
            data_type='float32',
            threshold=1e-4):
    '''
    generate theano function for doing lecun lcn of a given setting
    modified from lecun_lcn in pylearn2.datasets.preprocessing 

    currently data_type can only be float32
    if not, will report error saying input and kernel should be the same type
    and kernel type is float32
    '''

    X = tensor.matrix(dtype=data_type)
    X = X.reshape((batch_size, img_shape[0], img_shape[1], 1))

    filter_shape = (1, 1, kernel_size, kernel_size)
    filters = sharedX(gaussian_filter(kernel_size).reshape(filter_shape))

    input_space = Conv2DSpace(shape=img_shape, num_channels=1)
    transformer = Conv2D(filters=filters,
                         batch_size=batch_size,
                         input_space=input_space,
                         border_mode='full')
    convout = transformer.lmul(X)

    # For each pixel, remove mean of 9x9 neighborhood
    mid = int(np.floor(kernel_size / 2.))
    centered_X = X - convout[:, mid:-mid, mid:-mid, :]

    # Scale down norm of 9x9 patch if norm is bigger than 1
    transformer = Conv2D(filters=filters,
                         batch_size=batch_size,
                         input_space=input_space,
                         border_mode='full')
    sum_sqr_XX = transformer.lmul(X**2)

    denom = tensor.sqrt(sum_sqr_XX[:, mid:-mid, mid:-mid, :])
    per_img_mean = denom.mean(axis=[1, 2])
    divisor = tensor.largest(per_img_mean.dimshuffle(0, 'x', 'x', 1), denom)
    divisor = tensor.maximum(divisor, threshold)

    new_X = centered_X / divisor
    new_X = tensor.flatten(new_X, outdim=3)

    f = function([X], new_X)
    return f
Пример #25
0
def make_network(input_p,
                 input_q,
                 dim,
                 criterion='mmd',
                 biased=True,
                 streaming_est=False,
                 linear_kernel=False,
                 log_sigma=0,
                 hotelling_reg=0,
                 opt_log=True,
                 batchsize=None,
                 net_version='nothing'):

    in_p = lasagne.layers.InputLayer(shape=(batchsize, dim), input_var=input_p)
    in_q = lasagne.layers.InputLayer(shape=(batchsize, dim), input_var=input_q)
    net_p, net_q, reg = net_versions[net_version](in_p, in_q)
    rep_p, rep_q = lasagne.layers.get_output([net_p, net_q])

    choices = {  # criterion, linear kernel, streaming
        ('mmd', False, False): mmd.rbf_mmd2,
        ('mmd', False, True): mmd.rbf_mmd2_streaming,
        ('mmd', True, False): mmd.linear_mmd2,
        ('ratio', False, False): mmd.rbf_mmd2_and_ratio,
        ('ratio', False, True): mmd.rbf_mmd2_streaming_and_ratio,
        ('ratio', True, False): mmd.linear_mmd2_and_ratio,
        ('hotelling', True, False): mmd.linear_mmd2_and_hotelling,
    }
    try:
        fn = choices[criterion, linear_kernel, streaming_est]
    except KeyError:
        raise ValueError("Bad parameter combo: criterion = {}, {}, {}".format(
            criterion, "linear kernel" if linear_kernel else "rbf kernel",
            "streaming" if streaming_est else "not streaming"))

    kwargs = {}
    if linear_kernel:
        log_sigma = None
    else:
        log_sigma = theano.shared(make_floatX(log_sigma), name='log_sigma')
        kwargs['sigma'] = T.exp(log_sigma)
    if not streaming_est:
        kwargs['biased'] = biased
    if criterion == 'hotelling':
        kwargs['reg'] = hotelling_reg

    mmd2_pq, stat = fn(rep_p, rep_q, **kwargs)
    obj = -(T.log(T.largest(stat, 1e-6)) if opt_log else stat) + reg
    return mmd2_pq, obj, rep_p, net_p, net_q, log_sigma
Пример #26
0
def rbf_mmd2_streaming_and_ratio(X, Y, sigma=0):
    # n = (T.smallest(X.shape[0], Y.shape[0]) // 2) * 2
    n = (X.shape[0] // 2) * 2
    gamma = 1 / (2 * sigma**2)
    rbf = lambda A, B: T.exp(-gamma * ((A - B)**2).sum(axis=1))
    h_bits = (rbf(X[:n:2], X[1:n:2]) + rbf(Y[:n:2], Y[1:n:2]) -
              rbf(X[:n:2], Y[1:n:2]) - rbf(X[1:n:2], Y[:n:2]))

    mmd2 = h_bits.mean()

    # variance is 1/2 E_{v, v'} (h(v) - h(v'))^2
    # estimate with even, odd diffs
    m = (n // 2) * 2
    approx_var = 1 / 2 * ((h_bits[:m:2] - h_bits[1:m:2])**2).mean()
    ratio = mmd2 / T.sqrt(T.largest(approx_var, _eps))
    return mmd2, ratio
Пример #27
0
def lecun_lcn(input, img_shape, kernel_shape, threshold=1e-4):
    """
    Yann LeCun's local contrast normalization

    Original code in Theano by: Guillaume Desjardins

    Parameters
    ----------
    input : WRITEME
    img_shape : WRITEME
    kernel_shape : WRITEME
    threshold : WRITEME
    """
    input = input.reshape((input.shape[0], input.shape[1], input.shape[2], 1))
    X = tensor.matrix(dtype=input.dtype)
    X = X.reshape((len(input), img_shape[0], img_shape[1], 1))

    filter_shape = (1, 1, kernel_shape, kernel_shape)
    filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape))

    input_space = Conv2DSpace(shape=img_shape, num_channels=1)
    transformer = Conv2D(filters=filters, batch_size=len(input),
                         input_space=input_space,
                         border_mode='full')
    convout = transformer.lmul(X)

    # For each pixel, remove mean of 9x9 neighborhood
    mid = int(numpy.floor(kernel_shape / 2.))
    centered_X = X - convout[:, mid:-mid, mid:-mid, :]

    # Scale down norm of 9x9 patch if norm is bigger than 1
    transformer = Conv2D(filters=filters,
                         batch_size=len(input),
                         input_space=input_space,
                         border_mode='full')
    sum_sqr_XX = transformer.lmul(X ** 2)

    denom = tensor.sqrt(sum_sqr_XX[:, mid:-mid, mid:-mid, :])
    per_img_mean = denom.mean(axis=[1, 2])
    divisor = tensor.largest(per_img_mean.dimshuffle(0, 'x', 'x', 1), denom)
    divisor = tensor.maximum(divisor, threshold)

    new_X = centered_X / divisor
    new_X = tensor.flatten(new_X, outdim=3)

    f = function([X], new_X)
    return f(input)
def leaky_beta_asymmetric_fixation_2b(o, t, o2, f1, f2, v, alpha, beta, d,
                                      omega, tau_p, tau_n, theta):
    """
    
    Forgetful beta model with asymmetric updating and continuous fixation weighting of value
    ----------------------------------------------------------------------------------------
    
    Identical to the second fixation model but with weighting updated continuously rather than 
    downweighting the least fixated option.

    Args:
        o: Trial outcome
        t: Time (not used)
        o2: Outcome of the other stimulus
        f1: Fixation duration proportion for this stimulus
        f2: Fixation duration proportion for the other stimulus
        v: Previous trial value estimate (not used)
        alpha: Starting alpha
        beta: Starting beta
        d: Decay (forgetting) rate
        omega: Weight of the other stimulus outcome
        tau_p: Positive update weight
        tau_n: Negative update weight
        theta: Weighting on fixation-dependent bonus to alpha

    Returns:
        Mean: Estimated probability on the current trial (mean of beta distribution)
        Alpha: Alpha value on current trial
        Beta: Beta value on current trial
        Var: Variance of beta distribution

    """

    alpha = (1 - d) * alpha + (o * tau_p) + (
        omega * f2 * o2) + T.largest(0, f1 - f2) * theta
    beta = (1 - d) * beta + ((1 - o) * tau_n) + (omega * f2 * (1 - o2))

    alpha = T.maximum(T.power(0.1, 10), alpha)
    beta = T.maximum(T.power(0.1, 10), beta)

    value = alpha / (alpha + beta)

    var = (alpha * beta) / (T.pow(alpha + beta, 2) * (alpha + beta + 1))

    return (value, alpha, beta, var)
Пример #29
0
def lecun_lcn(input, kernel_size=9, threshold=1e-4, use_divisor=False):
    """
    Yann LeCun's local contrast normalization
    Orginal code in Theano by: Guillaume Desjardins

    :param input:
    :param kernel_size:
    :param threshold:
    :param use_divisor:
    :return:
    """
    input_shape = (input.shape[0], 1, input.shape[1], input.shape[2])
    input = input.reshape(input_shape).astype(floatX)

    X = T.tensor4(dtype=floatX)
    filter_shape = (1, 1, kernel_size, kernel_size)
    filters = gaussian_filter(kernel_size).reshape(filter_shape)
    filters = shared(_asarray(filters, dtype=floatX), borrow=True)

    convout = conv2d(input=X,
                     filters=filters,
                     input_shape=input.shape,
                     filter_shape=filter_shape,
                     border_mode='half')
    new_X = X - convout

    if use_divisor:
        # Scale down norm of kernel_size x kernel_size patch
        sum_sqr_XX = conv2d(input=T.sqr(T.abs_(new_X)),
                            filters=filters,
                            input_shape=input.shape,
                            filter_shape=filter_shape,
                            border_mode='half')

        denom = T.sqrt(sum_sqr_XX)
        per_img_mean = denom.mean(axis=[2, 3])
        divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)
        divisor = T.maximum(divisor, threshold)
        new_X = new_X / divisor

    new_X = new_X.dimshuffle(0, 2, 3, 1)
    new_X = new_X.flatten(ndim=3)
    f = function([X], new_X)
    return f(input)
Пример #30
0
def lecun_lcn(input, kernel_size=9, threshold=1e-4, use_divisor=False):
    """
    Yann LeCun's local contrast normalization
    Orginal code in Theano by: Guillaume Desjardins

    :param input:
    :param kernel_size:
    :param threshold:
    :param use_divisor:
    :return:
    """
    input_shape = (input.shape[0], 1, input.shape[1], input.shape[2])
    input = input.reshape(input_shape).astype(floatX)

    X = T.tensor4(dtype=floatX)
    filter_shape = (1, 1, kernel_size, kernel_size)
    filters = gaussian_filter(kernel_size).reshape(filter_shape)
    filters = shared(_asarray(filters, dtype=floatX), borrow=True)

    convout = conv2d(input=X,
                     filters=filters,
                     input_shape=input.shape,
                     filter_shape=filter_shape,
                     border_mode='half')
    new_X = X - convout

    if use_divisor:
        # Scale down norm of kernel_size x kernel_size patch
        sum_sqr_XX = conv2d(input=T.sqr(T.abs_(new_X)),
                            filters=filters,
                            input_shape=input.shape,
                            filter_shape=filter_shape,
                            border_mode='half')

        denom = T.sqrt(sum_sqr_XX)
        per_img_mean = denom.mean(axis=[2, 3])
        divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)
        divisor = T.maximum(divisor, threshold)
        new_X = new_X / divisor

    new_X = new_X.dimshuffle(0, 2, 3, 1)
    new_X = new_X.flatten(ndim=3)
    f = function([X], new_X)
    return f(input)
Пример #31
0
def lecun_lcn(input, img_shape, kernel_shape, threshold=1e-4):
    """
    Yann LeCun's local contrast normalization
    This is performed per-colorchannel!!!

    http://yann.lecun.com/exdb/publis/pdf/jarrett-iccv-09.pdf
    """
    input = input.reshape((input.shape[0], 1, input.shape[1], input.shape[2]))
    X = T.matrix(dtype=input.dtype)
    X = X.reshape((len(input), 1, img_shape[0], img_shape[1]))

    filter_shape = (1, 1, kernel_shape, kernel_shape)
    filters = theano.shared(
        gaussian_filter(kernel_shape).reshape(filter_shape))

    convout = conv.conv2d(input=X,
                          filters=filters,
                          image_shape=(input.shape[0], 1, img_shape[0],
                                       img_shape[1]),
                          filter_shape=filter_shape,
                          border_mode='full')

    # For each pixel, remove mean of 9x9 neighborhood
    mid = int(np.floor(kernel_shape / 2.))
    centered_X = X - convout[:, :, mid:-mid, mid:-mid]

    # Scale down norm of 9x9 patch if norm is bigger than 1
    sum_sqr_XX = conv.conv2d(input=T.sqr(X),
                             filters=filters,
                             image_shape=(input.shape[0], 1, img_shape[0],
                                          img_shape[1]),
                             filter_shape=filter_shape,
                             border_mode='full')

    denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid])
    per_img_mean = T.mean(denom, axis=(1, 2))
    divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)
    divisor = T.maximum(divisor, threshold)

    new_X = centered_X / divisor
    #new_X = theano.tensor.flatten(new_X, outdim=3)

    f = theano.function([X], new_X)
    return f(input)
Пример #32
0
def make_lecun_lcn(input_shape, img_shape, kernel_shape, threshold=1e-4):
    """
    lecun local contrast normalization
    :param input_shape: (batch_size, stack_size, nb_row, nb_col)
    :param img_shape: (nb_row, nb_col) image dimensions
    :param kernel_shape: kernel shape of image eg: 9x9
    :param threshold: threshold to allow enhance of edges
    :return: theano function that computes the local contrast normalized image
    """
    X = T.matrix(dtype=theano.config.floatX)
    X = X.reshape(input_shape)

    filter_shape = (1, 1, kernel_shape, kernel_shape)
    filters = gaussian_filter(kernel_shape).reshape(filter_shape)

    convout = conv.conv2d(input=X,
                          filters=filters,
                          image_shape=(input_shape[0], 1, img_shape[0], img_shape[1]),
                          filter_shape=filter_shape,
                          border_mode='full')

    # For each pixel, remove mean of 9x9 neighborhood
    mid = int(np.floor(kernel_shape / 2.))
    centered_X = X - convout[:, :, mid:-mid, mid:-mid]

    # Scale down norm of 9x9 patch if norm is bigger than 1
    sum_sqr_XX = conv.conv2d(input=centered_X ** 2,
                             filters=filters,
                             image_shape=(input_shape[0], 1, img_shape[0], img_shape[1]),
                             filter_shape=filter_shape,
                             border_mode='full')

    denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid])
    per_img_mean = denom.mean(axis=[1, 2])
    divisor = T.largest(per_img_mean.dimshuffle(0, 'x', 'x', 1), denom)
    divisor = T.maximum(divisor, threshold)

    new_X = centered_X / divisor
    new_X = new_X.dimshuffle(0, 2, 3, 1)
    new_X = new_X.flatten(ndim=3)

    f = theano.function([X], new_X)
    return f
def lecun_lcn(input, img_shape, kernel_shape, threshold=1e-4):
    """
    Yann LeCun's local contrast normalization
    This is performed per-colorchannel!!!

    http://yann.lecun.com/exdb/publis/pdf/jarrett-iccv-09.pdf
    """
    input = input.reshape((input.shape[0], 1, input.shape[1], input.shape[2]))
    X = T.matrix(dtype=input.dtype)
    X = X.reshape((len(input), 1, img_shape[0], img_shape[1]))

    filter_shape = (1, 1, kernel_shape, kernel_shape)
    filters = theano.shared(gaussian_filter(kernel_shape).reshape(filter_shape))

    convout = conv.conv2d(input=X,
                          filters=filters,
                          image_shape=(input.shape[0], 1, img_shape[0], img_shape[1]),
                          filter_shape=filter_shape,
                          border_mode='full')

    # For each pixel, remove mean of 9x9 neighborhood
    mid = int(np.floor(kernel_shape / 2.))
    centered_X = X - convout[:, :, mid:-mid, mid:-mid]

    # Scale down norm of 9x9 patch if norm is bigger than 1
    sum_sqr_XX = conv.conv2d(input=T.sqr(X),
                             filters=filters,
                             image_shape=(input.shape[0], 1, img_shape[0], img_shape[1]),
                             filter_shape=filter_shape,
                             border_mode='full')

    denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid])
    per_img_mean = T.mean(denom, axis=(1, 2))
    divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)
    divisor = T.maximum(divisor, threshold)

    new_X = centered_X / divisor
    #new_X = theano.tensor.flatten(new_X, outdim=3)

    f = theano.function([X], new_X)
    return f(input)
Пример #34
0
def lcn_lacombe(data, kernel_shape, n_maps):

    # create basic filter that spans all feature maps
    filter_shape = (1, n_maps, kernel_shape, kernel_shape)
    filters = np.resize(gaussian_filter(kernel_shape), filter_shape)
    filters = filters / np.sum(
        filters
    )  # todo: don't scale as this makes input much smaller than weights
    filters = sharedX(filters)

    # for feature_map in xrange(data.shape[0]):
    #
    #     temp[1, feature_map, :, :] = filters
    #
    # temp = temp / ml.repmat(np.sum(temp), (1, data.shape[0], kernel_shape, kernel_shape))

    # filters = sharedX(temp)

    # data = [examples, maps, length, width]; filters = [1, maps, kernel_shape, kernel_shape]
    # output = [examples, 1, length - (kernel_shape - 1), width - (kernel_shape - 1)]
    convout = conv2d(data, filters=filters, border_mode='full')
    # convout = np.reshape(convout, (convout.shape[0], data.shape[1], convout.shape[2], convout.shape[3]))

    # For each pixel, remove mean of 9x9 neighborhood
    mid = int(np.floor(kernel_shape / 2.))
    convout = convout[:, :, mid:-mid, mid:-mid]
    centered_X = data - T.tile(convout, (1, n_maps, 1, 1))

    # Scale down norm of 9x9 patch if norm is bigger than 1
    sum_sqr_XX = conv2d(T.sqr(data), filters=filters, border_mode='full')

    denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid])
    per_img_mean = denom.mean(axis=[1, 2, 3])
    divisor = T.largest(per_img_mean.dimshuffle(0, 'x', 'x', 'x'),
                        T.tile(denom, (1, n_maps, 1, 1)))

    new_X = centered_X / T.maximum(1., divisor)
    # new_X = new_X[:, :, mid:-mid, mid:-mid]  # maybe safer to return valid area

    return new_X
Пример #35
0
def lecun_lcn(input, img_shape, kernel_shape, threshold=1e-4):
    """
    Yann LeCun's local contrast normalization
    Orginal code in Theano by: Guillaume Desjardins
    """
    input = input.reshape(input.shape[0], 1, img_shape[0], img_shape[1])
    X = T.matrix(dtype=theano.config.floatX)
    X = X.reshape(input.shape)

    filter_shape = (1, 1, kernel_shape, kernel_shape)
    filters = gaussian_filter(kernel_shape).reshape(filter_shape)

    convout = conv.conv2d(input=X,
                             filters=filters,
                             image_shape=(input.shape[0], 1, img_shape[0], img_shape[1]),
                             filter_shape=filter_shape,
                             border_mode='full')

    # For each pixel, remove mean of 9x9 neighborhood
    
    mid = int(np.floor(kernel_shape / 2.))
    centered_X = X - convout[:, :, mid:-mid, mid:-mid]
    # Scale down norm of 9x9 patch if norm is bigger than 1
    sum_sqr_XX = conv.conv2d(input=centered_X ** 2,
                             filters=filters,
                             image_shape=(input.shape[0], 1, img_shape[0], img_shape[1]),
                             filter_shape=filter_shape,
                             border_mode='full')

    denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid])
    per_img_mean = denom.mean(axis=[1, 2])
    divisor = T.largest(per_img_mean.dimshuffle(0, 'x', 'x', 1), denom)
    divisor = T.maximum(divisor, threshold)

    new_X = centered_X / divisor
    new_X = new_X.dimshuffle(0, 2, 3, 1)
    new_X = new_X.flatten(ndim=3)

    f = theano.function([X], new_X)
    return f(input)
Пример #36
0
Файл: lcn.py Проект: osdf/cv
def lcn_sublayer(image, image_shape, fmaps, pool_depth, width, sigma):
    """
    """
    print image_shape
    print fmaps, pool_depth, width, sigma
    border = width//2
    filters = lcn_filters(fmaps, pool_depth, width, sigma) 
    filter_shape = filters.shape
    blurred_mean = conv.conv2d(input=image, filters=filters, 
            image_shape=image_shape, filter_shape=filter_shape,
            border_mode='full')
    image -= blurred_mean[:, :, border:-border, border:-border]
    
    image_sqr = T.sqr(image)
    blurred_sqr = conv.conv2d(input=image_sqr, filters=filters, 
            image_shape=image_shape, filter_shape=filter_shape,
            border_mode='full')

    div = T.sqrt(blurred_sqr[:, :, border:-border, border:-border])
    fm_mean = div.mean(axis=[2, 3])
    div = T.largest(fm_mean.dimshuffle(0, 1, 'x', 'x'), div) + 1e-6
    image = image/div
    return T.cast(image, theano.config.floatX)
Пример #37
0
def lecun_lcn(  X, kernel_size=7, threshold = 1e-4, use_divisor=False):
        filter_shape = (1, 1, kernel_size, kernel_size)
        filters = gaussian_filter(kernel_size).reshape(filter_shape)
        filters = theano.shared(np.array(asarray(filters, dtype='float32')), borrow=True)

        convout = theano.tensor.nnet.conv2d(X, filters=filters, filter_shape=filter_shape, 
                            border_mode='full')

         
        mid = int(floor(kernel_size/2.))
        new_X = X - convout[:,:,mid:-mid,mid:-mid]

        if use_divisor: 
            sum_sqr_XX = conv2d(T.sqr(T.abs_(X)), filters=filters, 
                                filter_shape=filter_shape, border_mode='full')

            denom = T.sqrt(sum_sqr_XX[:,:,mid:-mid,mid:-mid])
            per_img_mean = denom.mean(axis=[2,3])
            divisor = T.largest(per_img_mean.dimshuffle(0,1,'x','x'), denom)
            divisor = T.maximum(divisor, threshold)

            new_X /= divisor
        return new_X 
Пример #38
0
def lecun_lcn_batch(input, kernel_shape=9, threshold=1e-4):
    input = np.float64(input)

    X = input.transpose(3, 0, 1, 2).reshape(
        (input.shape[0] * input.shape[3], 1, input.shape[1], input.shape[2]))

    filter_shape = (1, 1, kernel_shape, kernel_shape)
    filters = gaussian_filter(kernel_shape).reshape(filter_shape)
    filters = theano.shared(theano._asarray(filters,
                                            dtype=theano.config.floatX),
                            borrow=True)

    convout = conv.conv2d(input=X,
                          filters=filters,
                          filter_shape=filter_shape,
                          border_mode='full')

    mid = int(np.floor(kernel_shape / 2.))
    centered_X = X - convout[:, :, mid:-mid, mid:-mid]

    sum_sqr_XX = conv.conv2d(input=centered_X**2,
                             filters=filters,
                             filter_shape=filter_shape,
                             border_mode='full')

    denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid])
    per_img_mean = denom.mean(axis=[2, 3])
    divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)

    divisor = T.maximum(divisor, threshold)

    new_X = centered_X / divisor

    output = new_X.eval().reshape(
        (input.shape[3], input.shape[0], input.shape[1],
         input.shape[2])).transpose(1, 2, 3, 0)
    return output
Пример #39
0
def lcn_lacombe(data, kernel_shape, n_maps):

    # create basic filter that spans all feature maps
    filter_shape = (1, n_maps, kernel_shape, kernel_shape)
    filters = np.resize(gaussian_filter(kernel_shape), filter_shape)
    filters = filters / np.sum(filters)  # todo: don't scale as this makes input much smaller than weights
    filters = sharedX(filters)

    # for feature_map in xrange(data.shape[0]):
    #
    #     temp[1, feature_map, :, :] = filters
    #
    # temp = temp / ml.repmat(np.sum(temp), (1, data.shape[0], kernel_shape, kernel_shape))

    # filters = sharedX(temp)

    # data = [examples, maps, length, width]; filters = [1, maps, kernel_shape, kernel_shape]
    # output = [examples, 1, length - (kernel_shape - 1), width - (kernel_shape - 1)]
    convout = conv2d(data, filters=filters, border_mode='full')
    # convout = np.reshape(convout, (convout.shape[0], data.shape[1], convout.shape[2], convout.shape[3]))

    # For each pixel, remove mean of 9x9 neighborhood
    mid = int(np.floor(kernel_shape / 2.))
    convout = convout[:, :, mid:-mid, mid:-mid]
    centered_X = data - T.tile(convout, (1, n_maps, 1, 1))

    # Scale down norm of 9x9 patch if norm is bigger than 1
    sum_sqr_XX = conv2d(T.sqr(data), filters=filters, border_mode='full')

    denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid])
    per_img_mean = denom.mean(axis=[1, 2, 3])
    divisor = T.largest(per_img_mean.dimshuffle(0, 'x', 'x', 'x'), T.tile(denom, (1, n_maps, 1, 1)))

    new_X = centered_X / T.maximum(1., divisor)
    # new_X = new_X[:, :, mid:-mid, mid:-mid]  # maybe safer to return valid area

    return new_X
Пример #40
0
def main():
    """Download the Rouder et al. (2008) data set, organize it, fit the model, and
    plot the traces.

    """
    # load the data
    a = "https://raw.githubusercontent.com/PerceptionCognitionLab/"
    b = "data0/master/wmPNAS2008/lk2clean.csv"
    df = pd.read_csv(urlopen(a + b), index_col=0)

    # compress into "binomial" format
    data = []
    for (subj, N), _df in df.groupby(["sub", "N"]):

        data.append({
            "subj": subj,
            "M": N,
            "H": _df[_df.ischange.astype(bool)].resp.sum(),
            "D": _df.ischange.sum(),
            "F": _df[(1 - _df.ischange).astype(bool)].resp.sum(),
            "S": (1 - _df.ischange).sum(),
        })
    data = pd.DataFrame(data)
    subjects = data.subj.unique()

    # create a design matrix to map subjects to rows in data
    X = np.asarray(dmatrix("0 + C(subj)", data))

    # create model

    with pm.Model():

        # capacity
        mu = pm.Cauchy(name=r"$\mu_{(\kappa)}$", alpha=0, beta=5)
        de = pm.Normal(name=r"$\delta_{\kappa)}$",
                       mu=0,
                       sigma=1,
                       shape=len(subjects))
        si = pm.HalfCauchy(name=r"$\sigma_{(\kappa)}$", beta=5)
        x = pm.Deterministic(r"$\kappa$", mu + de * si)
        x = pm.Deterministic(r"$k$", tt.largest(x, tt.zeros(len(subjects))))
        k = pm.math.dot(X, x)

        # guesses "same"
        mu = pm.Cauchy(name=r"$\mu_{(\gamma)}$", alpha=0, beta=5)
        de = pm.Normal(name=r"$\delta_{\gamma)}$",
                       mu=0,
                       sigma=1,
                       shape=len(subjects))
        si = pm.HalfCauchy(name=r"$\sigma_{(\gamma)}$", beta=5)
        x = pm.Deterministic(r"$\gamma$", mu + de * si)
        x = pm.Deterministic(r"$g$", pm.math.sigmoid(x))
        g = pm.math.dot(X, x)

        # does not lapse
        mu = pm.Cauchy(name=r"$\mu_{(\zeta)}$", alpha=0, beta=5)
        de = pm.Normal(name=r"$\delta_{\zeta)}$",
                       mu=0,
                       sigma=1,
                       shape=len(subjects))
        si = pm.HalfCauchy(name=r"$\sigma_{(\zeta)}$", beta=5)
        x = pm.Deterministic(r"$\zeta$", mu + de * si)
        x = pm.Deterministic(r"$z$", pm.math.sigmoid(x))
        z = pm.math.dot(X, x)

        # probabilities
        q = tt.smallest(k / data.M, tt.ones(len(data)))
        h = (1 - z) * g + z * q + z * (1 - q) * g
        f = (1 - z) * g + z * (1 - q) * g

        # responses
        pm.Binomial(name="$H$", p=h, n=data.D, observed=data.H)
        pm.Binomial(name="$F$", p=f, n=data.S, observed=data.F)

        # sample and plot
        trace = pm.sample(draws=5000, tune=2000, chains=2)
        pm.traceplot(trace, compact=True)
        plt.savefig("../../assets/images/wm-cap.png",
                    bbox_inches=0,
                    transparent=True)
Пример #41
0
    def create_train(self, lenW, n_facts):
        ONE = theano.shared(np.float32(1))
        ZERO = theano.shared(np.float32(0))

        # 仔细看这三个函数,就是搞成等长的 
        def phi_x1(x_t, L):  # 公式(5)的
            return T.concatenate([L[x_t].reshape((-1,)), zeros((2*lenW,)), zeros((3,))], axis=0)
        def phi_x2(x_t, L):  # 公式(5)的
            return T.concatenate([zeros((lenW,)), L[x_t].reshape((-1,)), zeros((lenW,)), zeros((3,))], axis=0)
        def phi_y(x_t, L):   # 公式(5)的
            return T.concatenate([zeros((2*lenW,)), L[x_t].reshape((-1,)), zeros((3,))], axis=0)


        def phi_t(x_t, y_t, yp_t, L): # 公式(10)的,可见yp_t的意思
            return T.concatenate([zeros(3*lenW,), T.stack(T.switch(T.lt(x_t,y_t), ONE, ZERO), T.switch(T.lt(x_t,yp_t), ONE, ZERO), T.switch(T.lt(y_t,yp_t), ONE, ZERO))], axis=0)
        def s_Ot(xs, y_t, yp_t, L):  # 就是论文里的s右下角O右下角t,公式(3)的
            result, updates = theano.scan(
                lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), self.U_Ot.T),
                               T.dot(self.U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t(x_t, y_t, yp_t, L)))),
                sequences=[xs, T.arange(T.shape(xs)[0])])
            return result.sum()
        def sR(xs, y_t, L, V):  # 就是论文里的s右下角R,公式(4)的
            result, updates = theano.scan(
                lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), self.U_R.T),
                                     T.dot(self.U_R, phi_y(y_t, V))),
                sequences=[xs, T.arange(T.shape(xs)[0])])
            return result.sum()
            
        x_t = T.iscalar('x_t')
        y_t = T.iscalar('y_t')
        yp_t = T.iscalar('yp_t')
        xs = T.ivector('xs')
        m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)]  # m应该是内存
        f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)]  # 公式(6)(7)的
        r_t = T.iscalar('r_t')  # 公式(8)的
        gamma = T.scalar('gamma')
        L = T.fmatrix('L') # list of messages 多个向量 每个代表一句话
        V = T.fmatrix('V') # vocab 多个向量 每个代表一个词
        r_args = T.stack(*m)

        cost_arr = [0] * 2 * (len(m)-1)
        for i in xrange(len(m)-1):
            cost_arr[2*i], _ = theano.scan(  # 就是公式(6)
                    lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0]-1)), 0, T.largest(gamma - s_Ot(T.stack(*m[:i+1]), # T.stack(*m[:i+1])应该就是之前的记忆
					f[i], t, L), 0)),
                    sequences=[L, T.arange(T.shape(L)[0])])
            cost_arr[2*i] /= T.shape(L)[0]
            cost_arr[2*i+1], _ = theano.scan(  # 就是公式(7)
                    lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0]-1)), 0, T.largest(gamma + s_Ot(T.stack(*m[:i+1]), # T.stack(*m[:i+1])应该就是之前的记忆
					t, f[i], L), 0)),
                    sequences=[L, T.arange(T.shape(L)[0])])
            cost_arr[2*i+1] /= T.shape(L)[0]

        cost1, _ = theano.scan(  # 就是公式(8)
            lambda r_bar, t: T.switch(T.eq(r_t, t), 0, T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)),
            sequences=[V, T.arange(T.shape(V)[0])])
        cost1 /= T.shape(V)[0]

        cost = cost1.sum()
        for c in cost_arr:
            cost += c.sum()

        updates = sgd(cost, [self.U_Ot, self.U_R], learning_rate=self.lr)
        # print([r_t, gamma, L, V] + m + f) # 实际上是 [r_t, gamma, L, V, x_t, m_o0, f0_t]
        self.train_model = theano.function(
            inputs=[r_t, gamma, L, V] + m + f,  # 实际上是 [r_t, gamma, L, V, x_t, m_o0, f0_t]
            outputs=[cost],
            updates=updates)

#        theano.printing.pydotprint(self.train_model, outfile="./test.png",
#                                   var_with_name_simple=True)

        self.sR = theano.function([xs, y_t, L, V], sR(xs, y_t, L, V))
        self.s_Ot = theano.function([xs, y_t, yp_t, L], s_Ot(xs, y_t, yp_t, L))
Пример #42
0
    def create_train(self, lenW, n_facts):
        ONE = theano.shared(np.float32(1))
        ZERO = theano.shared(np.float32(0))

        # 仔细看这三个函数,就是搞成等长的
        def phi_x1(x_t, L):  # 公式(5)的
            return T.concatenate(
                [L[x_t].reshape((-1, )),
                 zeros((2 * lenW, )),
                 zeros((3, ))],
                axis=0)

        def phi_x2(x_t, L):  # 公式(5)的
            return T.concatenate([
                zeros((lenW, )), L[x_t].reshape((-1, )),
                zeros((lenW, )),
                zeros((3, ))
            ],
                                 axis=0)

        def phi_y(x_t, L):  # 公式(5)的
            return T.concatenate(
                [zeros((2 * lenW, )), L[x_t].reshape((-1, )),
                 zeros((3, ))],
                axis=0)

        def phi_t(x_t, y_t, yp_t, L):  # 公式(10)的,可见yp_t的意思
            return T.concatenate([
                zeros(3 * lenW, ),
                T.stack(T.switch(T.lt(x_t, y_t), ONE, ZERO),
                        T.switch(T.lt(x_t, yp_t), ONE, ZERO),
                        T.switch(T.lt(y_t, yp_t), ONE, ZERO))
            ],
                                 axis=0)

        def s_Ot(xs, y_t, yp_t, L):  # 就是论文里的s右下角O右下角t,公式(3)的
            result, updates = theano.scan(
                lambda x_t, t: T.dot(
                    T.dot(
                        T.switch(T.eq(t, 0),
                                 phi_x1(x_t, L).reshape((1, -1)),
                                 phi_x2(x_t, L).reshape(
                                     (1, -1))), self.U_Ot.T),
                    T.dot(self.U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t(
                        x_t, y_t, yp_t, L)))),
                sequences=[xs, T.arange(T.shape(xs)[0])])
            return result.sum()

        def sR(xs, y_t, L, V):  # 就是论文里的s右下角R,公式(4)的
            result, updates = theano.scan(
                lambda x_t, t: T.dot(
                    T.dot(
                        T.switch(T.eq(t, 0),
                                 phi_x1(x_t, L).reshape((1, -1)),
                                 phi_x2(x_t, L).reshape((1, -1))), self.U_R.T),
                    T.dot(self.U_R, phi_y(y_t, V))),
                sequences=[xs, T.arange(T.shape(xs)[0])])
            return result.sum()

        x_t = T.iscalar('x_t')
        y_t = T.iscalar('y_t')
        yp_t = T.iscalar('yp_t')
        xs = T.ivector('xs')
        m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)]  # m应该是内存
        f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)]  # 公式(6)(7)的
        r_t = T.iscalar('r_t')  # 公式(8)的
        gamma = T.scalar('gamma')
        L = T.fmatrix('L')  # list of messages 多个向量 每个代表一句话
        V = T.fmatrix('V')  # vocab 多个向量 每个代表一个词
        r_args = T.stack(*m)

        cost_arr = [0] * 2 * (len(m) - 1)
        for i in xrange(len(m) - 1):
            cost_arr[2 * i], _ = theano.scan(  # 就是公式(6)
                lambda f_bar, t: T.switch(
                    T.or_(T.eq(t, f[i]), T.eq(t,
                                              T.shape(L)[0] - 1)),
                    0,
                    T.largest(
                        gamma - s_Ot(
                            T.stack(*m[:i + 1]),  # T.stack(*m[:i+1])应该就是之前的记忆
                            f[i],
                            t,
                            L),
                        0)),
                sequences=[L, T.arange(T.shape(L)[0])])
            cost_arr[2 * i] /= T.shape(L)[0]
            cost_arr[2 * i + 1], _ = theano.scan(  # 就是公式(7)
                lambda f_bar, t: T.switch(
                    T.or_(T.eq(t, f[i]), T.eq(t,
                                              T.shape(L)[0] - 1)),
                    0,
                    T.largest(
                        gamma + s_Ot(
                            T.stack(*m[:i + 1]),  # T.stack(*m[:i+1])应该就是之前的记忆
                            t,
                            f[i],
                            L),
                        0)),
                sequences=[L, T.arange(T.shape(L)[0])])
            cost_arr[2 * i + 1] /= T.shape(L)[0]

        cost1, _ = theano.scan(  # 就是公式(8)
            lambda r_bar, t: T.switch(
                T.eq(r_t, t), 0,
                T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V),
                          0)),
            sequences=[V, T.arange(T.shape(V)[0])])
        cost1 /= T.shape(V)[0]

        cost = cost1.sum()
        for c in cost_arr:
            cost += c.sum()

        updates = sgd(cost, [self.U_Ot, self.U_R], learning_rate=self.lr)
        # print([r_t, gamma, L, V] + m + f) # 实际上是 [r_t, gamma, L, V, x_t, m_o0, f0_t]
        self.train_model = theano.function(
            inputs=[r_t, gamma, L, V] + m +
            f,  # 实际上是 [r_t, gamma, L, V, x_t, m_o0, f0_t]
            outputs=[cost],
            updates=updates)

        #        theano.printing.pydotprint(self.train_model, outfile="./test.png",
        #                                   var_with_name_simple=True)

        self.sR = theano.function([xs, y_t, L, V], sR(xs, y_t, L, V))
        self.s_Ot = theano.function([xs, y_t, yp_t, L], s_Ot(xs, y_t, yp_t, L))
Пример #43
0
    def create_train(self, lenW, n_facts):
        ONE = theano.shared(np.float32(1))
        ZERO = theano.shared(np.float32(0))
        def phi_x1(x_t, L):
            return T.concatenate([L[x_t].reshape((-1,)), zeros((2*lenW,)), zeros((3,))], axis=0)
        def phi_x2(x_t, L):
            return T.concatenate([zeros((lenW,)), L[x_t].reshape((-1,)), zeros((lenW,)), zeros((3,))], axis=0)
        def phi_y(x_t, L):
            return T.concatenate([zeros((2*lenW,)), L[x_t].reshape((-1,)), zeros((3,))], axis=0)
        def phi_t(x_t, y_t, yp_t, L):
            return T.concatenate([zeros(3*lenW,), T.stack(T.switch(T.lt(x_t,y_t), ONE, ZERO), T.switch(T.lt(x_t,yp_t), ONE, ZERO), T.switch(T.lt(y_t,yp_t), ONE, ZERO))], axis=0)
        def s_Ot(xs, y_t, yp_t, L):
            result, updates = theano.scan(
                lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), self.U_Ot.T),
                               T.dot(self.U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t(x_t, y_t, yp_t, L)))),
                sequences=[xs, T.arange(T.shape(xs)[0])])
            return result.sum()
        def sR(xs, y_t, L, V):
            result, updates = theano.scan(
                lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), self.U_R.T),
                                     T.dot(self.U_R, phi_y(y_t, V))),
                sequences=[xs, T.arange(T.shape(xs)[0])])
            return result.sum()
            
        x_t = T.iscalar('x_t')
        y_t = T.iscalar('y_t')
        yp_t = T.iscalar('yp_t')
        xs = T.ivector('xs')
        m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)]
        f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)]
        r_t = T.iscalar('r_t')
        gamma = T.scalar('gamma')
        L = T.fmatrix('L') # list of messages
        V = T.fmatrix('V') # vocab
        r_args = T.stack(*m)

        cost_arr = [0] * 2 * (len(m)-1)
        for i in xrange(len(m)-1):
            cost_arr[2*i], _ = theano.scan(
                    lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0]-1)), 0, T.largest(gamma - s_Ot(T.stack(*m[:i+1]), f[i], t, L), 0)),
                sequences=[L, T.arange(T.shape(L)[0])])
            cost_arr[2*i] /= T.shape(L)[0]
            cost_arr[2*i+1], _ = theano.scan(
                    lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0]-1)), 0, T.largest(gamma + s_Ot(T.stack(*m[:i+1]), t, f[i], L), 0)),
                sequences=[L, T.arange(T.shape(L)[0])])
            cost_arr[2*i+1] /= T.shape(L)[0]

        cost1, _ = theano.scan(
            lambda r_bar, t: T.switch(T.eq(r_t, t), 0, T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)),
            sequences=[V, T.arange(T.shape(V)[0])])
        cost1 /= T.shape(V)[0]

        cost = cost1.sum()
        for c in cost_arr:
            cost += c.sum()

        updates = sgd(cost, [self.U_Ot, self.U_R], learning_rate=self.lr)

        self.train_model = theano.function(
            inputs=[r_t, gamma, L, V] + m + f,
            outputs=[cost],
            updates=updates)

        self.sR = theano.function([xs, y_t, L, V], sR(xs, y_t, L, V))
        self.s_Ot = theano.function([xs, y_t, yp_t, L], s_Ot(xs, y_t, yp_t, L))
Пример #44
0
    def __init__(self, X, image_shape, threshold=1e-4, radius=9, use_divisor=True):
        """
        Allocate an LCN.

        :type X: theano.tensor.dtensor4
        :param X: symbolic image tensor, of shape image_shape

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)
        :type threshold: double
        :param threshold: the threshold will be used to avoid division by zeros

        :type radius: int
        :param radius: determines size of Gaussian filter patch (default 9x9)

        :type use_divisor: Boolean
        :param use_divisor: whether or not to apply divisive normalization
        """

        # Get Gaussian filter
        filter_shape = (1, image_shape[1], radius, radius)

        self.filters = theano.shared(self.gaussian_filter(filter_shape), borrow=True)

        # Compute the Guassian weighted average by means of convolution
        convout = conv.conv2d(
            input=X,
            filters=self.filters,
            image_shape=image_shape,
            filter_shape=filter_shape,
            border_mode='full'
        )

        # Subtractive step
        mid = int(numpy.floor(filter_shape[2] / 2.))

        # Make filter dimension broadcastable and subtract
        centered_X = X - T.addbroadcast(convout[:, :, mid:-mid, mid:-mid], 1)

        # Boolean marks whether or not to perform divisive step
        if use_divisor:
            # Note that the local variances can be computed by using the centered_X
            # tensor. If we convolve this with the mean filter, that should give us
            # the variance at each point. We simply take the square root to get our
            # denominator

            # Compute variances
            sum_sqr_XX = conv.conv2d(
                input=T.sqr(centered_X),
                filters=self.filters,
                image_shape=image_shape,
                filter_shape=filter_shape,
                border_mode='full'
            )


            # Take square root to get local standard deviation
            denom = T.sqrt(sum_sqr_XX[:,:,mid:-mid,mid:-mid])

            per_img_mean = denom.mean(axis=[2,3])
            divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)
            # Divisise step
            new_X = centered_X / T.maximum(T.addbroadcast(divisor, 1), threshold)
        else:
            new_X = centered_X

        self.output = new_X
Пример #45
0
filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape))

X = X.dimshuffle(0, 3, 1, 2)

convout = conv2d(X, filters=filters, border_mode='full')

# For each pixel, remove mean of 9x9 neighborhood
mid = int(np.floor(kernel_shape / 2.))
centered_X = X - convout[:, :, mid:-mid, mid:-mid]

# Scale down norm of 9x9 patch if norm is bigger than 1
sum_sqr_XX = conv2d(T.sqr(X), filters=filters, border_mode='full')

denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid])
per_img_mean = denom.mean(axis=[2, 3])
divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)

new_X = centered_X / T.maximum(1., divisor)

new_X = new_X.dimshuffle(0, 2, 3, 1)

from theano import function
f = function([orig_X], new_X)

j = 0
for path in paths:
    if j % 100 == 0:
        print j
    try:
        raw_path = path
        path = base + '/' + path
Пример #46
0
    def create_train(self, lenW, n_facts):
        ONE = theano.shared(np.float32(1))
        ZERO = theano.shared(np.float32(0))

        def phi_x1(x_t, L):  #处理phi_x里x是问句的情况,from the actual input x
            return T.concatenate(
                [L[x_t].reshape((-1, )),
                 zeros((2 * lenW, )),
                 zeros((3, ))],
                axis=0
            )  # reshape里出现-1意味着这一维度可以被推导出来。这三个phi函数的reshape((-1,))意味转成一维。可是没啥用,问句就一句。
            # 把x_t在向量空间模型里对应的向量拿出来放到第一个lenW位置上
            # 这里返回一个很长的list,长度为3*lenW + 3
        def phi_x2(x_t, L):  #处理phi_x里x不是问句,是记忆的情况,from the supporting memories
            return T.concatenate(
                [
                    zeros((lenW, )), L[x_t].reshape((-1, )),
                    zeros((lenW, )),
                    zeros((3, ))
                ],
                axis=0)  # 返回长度3*lenW + 3,和前面的phi_x1区别在于维度位置不一样,见论文第3页

        def phi_y(x_t, L):
            return T.concatenate(
                [zeros((2 * lenW, )), L[x_t].reshape((-1, )),
                 zeros((3, ))],
                axis=0)  #放在第三个lenW的位置上

        def phi_t(x_t, y_t, yp_t, L):
            return T.concatenate([
                zeros(3 * lenW, ),
                T.stack(T.switch(T.lt(x_t, y_t), ONE, ZERO),
                        T.switch(T.lt(x_t, yp_t), ONE, ZERO),
                        T.switch(T.lt(y_t, yp_t), ONE, ZERO))
            ],
                                 axis=0)
            # 3*lenW + 3里的3
            # lt(a, b): a < b,在这里都是id,id越小意味着越早写入记忆了,也就是越older,设为1
        def s_Ot(xs, y_t, yp_t, L):
            result, updates = theano.scan(lambda x_t, t: T.dot(
                T.dot(
                    T.switch(T.eq(t, 0),
                             phi_x1(x_t, L).reshape((1, -1)),
                             phi_x2(x_t, L).reshape((1, -1))), self.U_Ot.T),
                T.dot(self.U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t(
                    x_t, y_t, yp_t, L)))),
                                          sequences=[
                                              xs, T.arange(T.shape(xs)[0])
                                          ])  # T.eq(t, 0) 如果t是id(第0个),也就是问句
            # y_t是正确的事实。
            return result.sum(
            )  # 把所有事实加起来,相当于论文里第3页注释3,这是由于VSM的线性关系。因为传入了前n个事实,对每个事实分别计算其与记忆的s_o,直接累加起来。

        def sR(xs, y_t, L, V):
            result, updates = theano.scan(
                lambda x_t, t: T.dot(
                    T.dot(
                        T.switch(T.eq(t, 0),
                                 phi_x1(x_t, L).reshape((1, -1)),
                                 phi_x2(x_t, L).reshape((1, -1))), self.U_R.T),
                    T.dot(self.U_R, phi_y(y_t, V))),
                sequences=[xs, T.arange(T.shape(xs)[0])])
            return result.sum()

        x_t = T.iscalar('x_t')
        y_t = T.iscalar('y_t')
        yp_t = T.iscalar('yp_t')
        xs = T.ivector('xs')
        m = [x_t] + [T.iscalar('m_o%d' % i)
                     for i in xrange(n_facts)]  #x_t用于“灌入”id
        f = [T.iscalar('f%d_t' % i)
             for i in xrange(n_facts)]  #m和f一样,都是正确的事实(在原论文里是mo1和mo2)
        r_t = T.iscalar('r_t')  #self.H[line['answer']],正确答案,用于R部分。
        gamma = T.scalar('gamma')
        L = T.fmatrix('L')  # list of messages
        #memory_list,是一个记忆的vsm矩阵,行是记忆的长度,列数为lenW(因为是vsm)
        V = T.fmatrix('V')  # vocab
        #self.V,一个关于词汇的vsm矩阵
        r_args = T.stack(*m)  #将m并在一起,和concatenate的区别在于,这里会增加1维。感觉这里没啥必要。

        cost_arr = [0] * 2 * (len(m) - 1)
        for i in xrange(len(m) - 1):  #len(m)-1,就是事实的个数,原论文里是2
            cost_arr[2 * i], _ = theano.scan(
                lambda f_bar, t: T.switch(
                    T.or_(T.eq(t, f[i]), T.eq(t,
                                              T.shape(L)[0] - 1)), 0,
                    T.largest(gamma - s_Ot(T.stack(*m[:i + 1]), f[i], t, L), 0)
                ),
                sequences=[L, T.arange(T.shape(L)[0])
                           ])  # 在这里,f[i]代表第i个事实,而t代表随机生成的(在这里是顺序循环)的错误答案。
            # T.eq(t, f[i]),即t命中了事实;或者是T.eq(t, T.shape(L)[0]-1),即t是最后一句(问句),返回0。否则返回后者largest部分。
            # 看论文p14公式,它的加总,排除了命中事实这种情况(t!=f)。另一方面,问句也不需要进入计算。
            # m[:i+1]输入前i个事实,外带一个id。
            # f_bar没啥用

            # T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)[0] - 1)), 0,
            #         T.largest(gamma - s_Ot(T.stack(*m[:i + 1]), f[i], t, L), 0))
            # 改成类似的ifelse如下:
            # if ((t == f[i])) | (t == T.shape(L)[0] - 1){
            #     return 0
            #  }else{
            #      return T.largest(gamma - s_Ot(T.stack(*m[: i + 1]), f[i], t, L), 0)
            #   }
            cost_arr[2 * i] /= T.shape(L)[0]  #这个除法在原论文里没看到
            cost_arr[2 * i + 1], _ = theano.scan(lambda f_bar, t: T.switch(
                T.or_(T.eq(t, f[i]), T.eq(t,
                                          T.shape(L)[0] - 1)), 0,
                T.largest(gamma + s_Ot(T.stack(*m[:i + 1]), t, f[i], L), 0)),
                                                 sequences=[
                                                     L,
                                                     T.arange(T.shape(L)[0])
                                                 ])
            cost_arr[2 * i + 1] /= T.shape(L)[0]
            # 作者这里做了一个有趣的处理,他设置了一个2倍事实数的数组cost_arr,其中偶数作为公式里的减部分,奇数作为公式里的加部分。

        cost1, _ = theano.scan(lambda r_bar, t: T.switch(
            T.eq(r_t, t), 0,
            T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)),
                               sequences=[V, T.arange(T.shape(V)[0])])
        cost1 /= T.shape(V)[0]
        # 公式的后部分

        cost = cost1.sum()
        for c in cost_arr:
            cost += c.sum()

        updates = sgd(cost, [self.U_Ot, self.U_R], learning_rate=self.lr)

        self.train_model = theano.function(inputs=[r_t, gamma, L, V] + m + f,
                                           outputs=[cost],
                                           updates=updates)

        self.sR = theano.function([xs, y_t, L, V], sR(xs, y_t, L, V))
        self.s_Ot = theano.function([xs, y_t, yp_t, L], s_Ot(xs, y_t, yp_t, L))
Пример #47
0
	def __init__(self, input_units, hidden_units,gamma,alpha):
		W = input_units * 3
		D = hidden_units
		V = input_units
		#self.V = vocab_size

		self.U_O = theano.shared((np.random.uniform(-1.0, 1.0,(D, W)) * 0.2).astype(np.float32))
		self.U_R = theano.shared((np.random.uniform(-1.0, 1.0,(D, W)) * 0.2).astype(np.float32))

		f1 = T.ivector("f1")
		_f1 = T.imatrix("_f1")

		f2 = T.ivector("f2")
		_f2 = T.imatrix("_f2")

		r = T.ivector("r")
		_r = T.imatrix("_r")

		x = T.ivector("x")
		m = T.imatrix("m") #memory
		v = T.imatrix("v") #vocabulary

		def S_O(x,y):
			x_emb = T.dot(self.U_O[:,:V],x)
			y_emb = T.dot(self.U_O[:,2*V:],y)
			return T.dot(x_emb.T,y_emb)

		def S_O_f(x,y):
			x_emb = T.dot(self.U_O[:,V:2*V],x)
			y_emb = T.dot(self.U_O[:,2*V:],y)
			return T.dot(x_emb.T,y_emb)

		def S_R(x,y):
			x_emb = T.dot(self.U_R[:,:V],x)
			y_emb = T.dot(self.U_R[:,2*V:],y)
			return T.dot(x_emb.T,y_emb)

		def S_R_f(x,y):
			x_emb = T.dot(self.U_R[:,V:2*V],x)
			y_emb = T.dot(self.U_R[:,2*V:],y)
			return T.dot(x_emb.T,y_emb)

		cost1,_ = theano.scan(
					lambda f_bar: T.largest(gamma - S_O(x,f1) + S_O(x,f_bar), 0),
					sequences = [_f1]
				)

		cost2,_ = theano.scan(
					lambda f_bar: T.largest(gamma - S_O(x,f2)  - S_O_f(f1,f2) + 
						S_O(x,f_bar) + S_O_f(f1,f_bar), 0),
					sequences = [_f2]
				)

		cost3,_ = theano.scan(
					lambda r_bar: T.largest(gamma - S_R(x,r) - S_R_f(f1,r)  - S_R_f(f2,r) +
						S_R(x,r_bar) +  S_R_f(f1,r_bar) + S_R_f(f2,r_bar), 0),
					sequences = [_r]
				)

		fact1 = T.argmax(S_O(x,m))
		self.getFact1 = theano.function(
					inputs= [x, m],
					outputs= fact1
					)

		fact2 = T.argmax(S_O(x,m) + S_O_f(f1,m))
		self.getFact2 = theano.function(
					inputs= [x, f1, m],
					outputs= fact2
					)

		predict = T.argmax(S_R(x,v) + S_R_f(f1,v) + S_R_f(f2,v))
		self.getAnswer = theano.function(
					inputs= [x, f1, f2, v],
					outputs= predict
					)

		cost = cost1.sum() + cost2.sum() + cost3.sum()

		grad_o, grad_r = T.grad(cost, [self.U_O,self.U_R])

		self.train = theano.function(
					inputs=[x, f1, _f1, f2, _f2, r, _r],
					outputs=[cost],
					updates=[(self.U_O, self.U_O - alpha*grad_o), (self.U_R,self.U_R - alpha*grad_r)]
					)

		self.computeCost = theano.function(
					inputs=[x, f1, _f1, f2, _f2, r, _r],
					outputs=[cost]
					)
Пример #48
0
#Definition of scoring function
score = UTemp.dot(
    T.tanh(E1Temp.dot(BTemp).dot(T.transpose(E2Temp)) + ATemp.dot(E1E2Temp)))
scoringFunction = theano.function(
    [ATemp, BTemp, UTemp, E1Temp, E2Temp, E1E2Temp], score)

#Definition of loss function

#calculated score of corrupted triplet to calculate loss
scoreCorrupted = UTemp.dot(
    T.tanh(E1Temp.dot(BTemp).dot(T.transpose(ECTemp)) + ATemp.dot(E1ECTemp)))
loss = T.largest(0, (1 - (UTemp.dot(
    T.tanh(E1Temp.dot(BTemp).dot(T.transpose(E2Temp)) + ATemp.dot(E1E2Temp))
)) + (UTemp.dot(
    T.tanh(E1Temp.dot(BTemp).dot(T.transpose(ECTemp)) + ATemp.dot(E1ECTemp))))
                     + regparam *
                     (T.sum(ATemp**2) + T.sum(BTemp**2) + T.sum(UTemp**2)) /
                     3))
lossFunction = theano.function(
    [ATemp, BTemp, UTemp, E1Temp, E2Temp, E1E2Temp, ECTemp, E1ECTemp], loss)

#Defining gradients
dA = T.grad(T.sum(loss), ATemp)
dB = T.grad(T.sum(loss), BTemp)
dU = T.grad(T.sum(loss), UTemp)
dE1 = T.grad(T.sum(loss), E1Temp)
dE2 = T.grad(T.sum(loss), E2Temp)
dEC = T.grad(T.sum(loss), ECTemp)

#Definition of function to return gradients
Пример #49
0
    def __init__(self, rng, input, filter_shape, filter_stride,
    filter_pad, image_shape,  pool_stride,poolsize=(2, 2),
    normalistation=False, pooling=False):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """
        print(image_shape[1])
        print(filter_shape[1])
        
        
        assert image_shape[1] == filter_shape[1]
        self.input = input
        #xavier init: W = np.random.randn(fan_in,fan_out)/np.sqrt(fan_in/2)
        
        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
                   numpy.prod(poolsize))
        # initialize weights with random weights
        #W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(
            numpy.asarray(
                rng.normal(scale=0.01,size=filter_shape)/numpy.sqrt(fan_in/2),
                dtype=theano.config.floatX
            ),
            borrow=True
        )
        '''
        We initialized the weights in each layer from a zero-mean Gaussian distribution with standard deviation
        0.01. We initialized the neuron biases in the second, fourth, and fifth convolutional layers,
        as well as in the fully-connected hidden layers, with the constant 1. This initialization accelerates
        the early stages of learning by providing the ReLUs with positive inputs. We initialized the neuron
        biases in the remaining layers with the constant 0.

        '''
        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)
        print(input.shape)
        #npad = ((0,0),(0,0),(filter_pad,filter_pad),(filter_pad,filter_pad))
        #input = numpy.pad(input, pad_width=npad, mode='constant', constant_values=0)
        #image_shape = input.shape
        
    
        # convolve input feature maps with filters
        if filter_pad>0:
            conv_out = T.nnet.conv2d(
                input=input,
                filters=self.W,
                filter_shape=filter_shape,
                image_shape=image_shape,
                subsample = (filter_stride,filter_stride),
                border_mode = filter_pad
            )
        else:
            conv_out = conv.conv2d(
                input=input,
                filters=self.W,
                filter_shape=filter_shape,
                image_shape=image_shape,
                subsample = (filter_stride,filter_stride)
                
            )
        # downsample each feature map individually, using maxpooling
        if pooling == True:
            pooled_out = downsample.max_pool_2d(
                input=conv_out,
                ds=poolsize,
                ignore_border=True,
                st = (pool_stride,pool_stride)
            )
        else:
            pooled_out=conv_out
        
        if normalistation == True:
            a=1
        else:
            normalised = pooled_out
        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.preOutput = normalised + self.b.dimshuffle('x', 0, 'x', 'x')
        print(T.largest(0,(normalised + self.b.dimshuffle('x', 0, 'x', 'x'))))
        self.output = T.largest(0,(normalised + self.b.dimshuffle('x', 0, 'x', 'x')))
        #self.output = T.tanh(normalised + self.b.dimshuffle('x', 0, 'x', 'x'))+(normalised + self.b.dimshuffle('x', 0, 'x', 'x'))*0.001
        #self.output = T.switch((pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) > 0, (pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')), 0 * (pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')))
        # store parameters of this layer
        self.params = [self.W, self.b]
        self.L1 = (
            abs(self.W).sum()
            
        )
        self.L2 = (
            (self.W**2).sum()
            
        )
        # keep track of model input
        self.input = input
        self.filter_shape = filter_shape
        self.image_shape = image_shape
    def __init__(self,
                 X,
                 image_shape,
                 threshold=1e-4,
                 radius=9,
                 use_divisor=True):
        """
        Allocate an LCN.

        :type X: theano.tensor.dtensor4
        :param X: symbolic image tensor, of shape image_shape

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)
        :type threshold: double
        :param threshold: the threshold will be used to avoid division by zeros

        :type radius: int
        :param radius: determines size of Gaussian filter patch (default 9x9)

        :type use_divisor: Boolean
        :param use_divisor: whether or not to apply divisive normalization
        """

        # Get Gaussian filter
        filter_shape = (1, image_shape[1], radius, radius)

        self.filters = theano.shared(self.gaussian_filter(filter_shape),
                                     borrow=True)

        # Compute the Guassian weighted average by means of convolution
        convout = conv.conv2d(input=X,
                              filters=self.filters,
                              image_shape=image_shape,
                              filter_shape=filter_shape,
                              border_mode='full')

        # Subtractive step
        mid = int(numpy.floor(filter_shape[2] / 2.))

        # Make filter dimension broadcastable and subtract
        centered_X = X - T.addbroadcast(convout[:, :, mid:-mid, mid:-mid], 1)

        # Boolean marks whether or not to perform divisive step
        if use_divisor:
            # Note that the local variances can be computed by using the centered_X
            # tensor. If we convolve this with the mean filter, that should give us
            # the variance at each point. We simply take the square root to get our
            # denominator

            # Compute variances
            sum_sqr_XX = conv.conv2d(input=T.sqr(centered_X),
                                     filters=self.filters,
                                     image_shape=image_shape,
                                     filter_shape=filter_shape,
                                     border_mode='full')

            # Take square root to get local standard deviation
            denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid])

            per_img_mean = denom.mean(axis=[2, 3])
            divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)
            # Divisise step
            new_X = centered_X / T.maximum(T.addbroadcast(divisor, 1),
                                           threshold)
        else:
            new_X = centered_X

        self.output = new_X
 def gated_se(self, y):
     gates = T.largest(self.output[:, :, :, 2:-2], y[:, :, :, 2:-2])
     return T.sum(gates * (self.output[:, :, :, 2:-2] - y[:, :, :, 2:-2]) ** 2) / T.sum(gates)
Пример #52
0
    def create_train(self, lenW, n_facts):
        ONE = theano.shared(np.float32(1))
        ZERO = theano.shared(np.float32(0))

        def phi_x1(x_t, L):
            return T.concatenate(
                [L[x_t].reshape((-1, )),
                 zeros((2 * lenW, )),
                 zeros((3, ))],
                axis=0)

        def phi_x2(x_t, L):
            return T.concatenate([
                zeros((lenW, )), L[x_t].reshape((-1, )),
                zeros((lenW, )),
                zeros((3, ))
            ],
                                 axis=0)

        def phi_y(x_t, L):
            return T.concatenate(
                [zeros((2 * lenW, )), L[x_t].reshape((-1, )),
                 zeros((3, ))],
                axis=0)

        def phi_t(x_t, y_t, yp_t, L):
            return T.concatenate([
                zeros(3 * lenW, ),
                T.stack(T.switch(T.lt(x_t, y_t), ONE, ZERO),
                        T.switch(T.lt(x_t, yp_t), ONE, ZERO),
                        T.switch(T.lt(y_t, yp_t), ONE, ZERO))
            ],
                                 axis=0)

        def s_Ot(xs, y_t, yp_t, L):
            result, updates = theano.scan(
                lambda x_t, t: T.dot(
                    T.dot(
                        T.switch(T.eq(t, 0),
                                 phi_x1(x_t, L).reshape((1, -1)),
                                 phi_x2(x_t, L).reshape(
                                     (1, -1))), self.U_Ot.T),
                    T.dot(self.U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t(
                        x_t, y_t, yp_t, L)))),
                sequences=[xs, T.arange(T.shape(xs)[0])])
            return result.sum()

        def sR(xs, y_t, L, V):
            result, updates = theano.scan(
                lambda x_t, t: T.dot(
                    T.dot(
                        T.switch(T.eq(t, 0),
                                 phi_x1(x_t, L).reshape((1, -1)),
                                 phi_x2(x_t, L).reshape((1, -1))), self.U_R.T),
                    T.dot(self.U_R, phi_y(y_t, V))),
                sequences=[xs, T.arange(T.shape(xs)[0])])
            return result.sum()

        x_t = T.iscalar('x_t')
        y_t = T.iscalar('y_t')
        yp_t = T.iscalar('yp_t')
        xs = T.ivector('xs')
        m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)]
        f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)]
        r_t = T.iscalar('r_t')
        gamma = T.scalar('gamma')
        L = T.fmatrix('L')  # list of messages
        V = T.fmatrix('V')  # vocab
        r_args = T.stack(*m)

        cost_arr = [0] * 2 * (len(m) - 1)
        for i in xrange(len(m) - 1):
            cost_arr[2 * i], _ = theano.scan(lambda f_bar, t: T.switch(
                T.or_(T.eq(t, f[i]), T.eq(t,
                                          T.shape(L)[0] - 1)), 0,
                T.largest(gamma - s_Ot(T.stack(*m[:i + 1]), f[i], t, L), 0)),
                                             sequences=[
                                                 L, T.arange(T.shape(L)[0])
                                             ])
            cost_arr[2 * i] /= T.shape(L)[0]
            cost_arr[2 * i + 1], _ = theano.scan(lambda f_bar, t: T.switch(
                T.or_(T.eq(t, f[i]), T.eq(t,
                                          T.shape(L)[0] - 1)), 0,
                T.largest(gamma + s_Ot(T.stack(*m[:i + 1]), t, f[i], L), 0)),
                                                 sequences=[
                                                     L,
                                                     T.arange(T.shape(L)[0])
                                                 ])
            cost_arr[2 * i + 1] /= T.shape(L)[0]

        cost1, _ = theano.scan(lambda r_bar, t: T.switch(
            T.eq(r_t, t), 0,
            T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)),
                               sequences=[V, T.arange(T.shape(V)[0])])
        cost1 /= T.shape(V)[0]

        cost = cost1.sum()
        for c in cost_arr:
            cost += c.sum()

        updates = sgd(cost, [self.U_Ot, self.U_R], learning_rate=self.lr)

        self.train_model = theano.function(inputs=[r_t, gamma, L, V] + m + f,
                                           outputs=[cost],
                                           updates=updates)

        self.sR = theano.function([xs, y_t, L, V], sR(xs, y_t, L, V))
        self.s_Ot = theano.function([xs, y_t, yp_t, L], s_Ot(xs, y_t, yp_t, L))