示例#1
0
    def _prob_expr(self, factors, as_logp):
        ''' Implementation for probability and logp functions respectively'''

        # Get Calibrated Potentials
        calibrated = self.calibrated_potentials(factors, 'sum_product')
        minlen = 9999999999
        mint = None
        # Find a clique with minimal scope, so we minimize the amount of final summing
        for v in calibrated:
            if (len(v.scope)<minlen):
                minlen = len(v.scope)
                mint = v
                if (minlen==1):
                    break
        # Marginalize out everything
        if (self.logspace):
            res = mint.logsumexp_marginalize(mint.scope)
            res = T.reshape(res.pt_tensor, [1], ndim=1)[0]
            if (not as_logp):
                res = T.exp(res)
        else:
            res = mint.marginalize(mint.scope)
            res = T.reshape(res.pt_tensor, [1], ndim=1)[0]
            if (as_logp):
                res = T.log(res)
        return res
示例#2
0
    def define_train_test_funcs(self):
        activation = self.layers[len(self.layers) - 1].activation
        self.Y = T.matrix("Y")
        pYs = T.reshape(activation, (self.maskY.shape[0] * self.batch_size, self.out_size))
        tYs =  T.reshape(self.Y, (self.maskY.shape[0] * self.batch_size, self.out_size))
        cost = self.categorical_crossentropy(pYs, tYs)
        
        gparams = []
        for param in self.params:
            #gparam = T.grad(cost, param)
            gparam = T.clip(T.grad(cost, param), -10, 10)
            gparams.append(gparam)

        lr = T.scalar("lr")
        # eval(): string to function
        optimizer = eval(self.optimizer)
        updates = optimizer(self.params, gparams, lr)

        #updates = sgd(self.params, gparams, lr)
        #updates = momentum(self.params, gparams, lr)
        #updates = rmsprop(self.params, gparams, lr)
        #updates = adagrad(self.params, gparams, lr)
        #updates = dadelta(self.params, gparams, lr)
        #updates = adam(self.params, gparams, lr)
        
        self.train = theano.function(inputs = [self.X, self.maskX, self.Y, self.maskY, lr, self.batch_size],
                                               givens = {self.is_train : np.cast['int32'](1)},
                                               outputs = cost,
                                               updates = updates)
        self.predict = theano.function(inputs = [self.X, self.maskX, self.batch_size],
                                                 givens = {self.is_train : np.cast['int32'](0)},
                                                 outputs = activation)
示例#3
0
def test_shape():
    x = T.tensor3()
    x_flat_2_mat = T.flatten(x, 2)
    x_flat_2_vec = T.flatten(x, 1)
    flat_f = theano.function([x], [x_flat_2_mat, x_flat_2_vec])
    flat_mat_val, flat_vec_val = flat_f(tensor3_val)
    print 'flatten to 2-d array:'
    print flat_mat_val
    print 'flatten to 1-d array:'
    print flat_vec_val

    x_mat = T.matrix()
    x_mat_2_t3 = T.reshape(x_mat, (2, 2, 2))
    x_mat_2_vec = T.reshape(x_mat, (8,))
    reshape_f = theano.function([x_mat], [x_mat_2_t3, x_mat_2_vec])
    """
    t3_shape = T.lvector()
    vec_shape = T.lvector()
    x_mat_2_t3 = T.reshape(x_mat, t3_shape, 3)
    x_mat_2_vec = T.reshape(x_mat, vec_shape, 1)
    reshape_f = theano.function([x_mat, t3_shape, vec_shape], [x_mat_2_t3, x_mat_2_vec])
    """
    mat_2_t3_val, mat_2_vec_val = reshape_f(flat_mat_val)
    print 'reshape 2-d array to 3-d array:'
    print mat_2_t3_val
    print 'reshape 2-d array to 1-d array:'
    print mat_2_vec_val
示例#4
0
    def get_unfolding_cost(self):
        ''' computes the unfolding rwconstructed cost (more than 2 inputs) '''
        x  = T.reshape(self.x, (-1, self.n_vector)) 
        yi = x[0];i=1
        for i in range(1, self.num):
        #while T.lt(i, self.num):
            xi = T.concatenate((yi, x[i]))
            yi = self.get_hidden_values(xi)
            i += 1
        # Save the deepest hidden value as output vactor
        self.vector = copy.deepcopy(yi)

        tmp = []
        i = 1
        for i in range(1, self.num):
        #while T.lt(i, self.num):
            zi = self.get_reconstructed(yi)
            t  = T.reshape(zi, (2, self.n_vector))
            tmp.append(t[1])
            yi = t[0]
            i += 1
        tmp.append(yi)
        tmp.reverse()
    
        x = self.x
        z = T.concatenate(tmp)
        
        # cross-entropy cost should be modified here.
        L = -T.sum( (0.5*x+0.5)*T.log(0.5*z+0.5) + (-0.5*x+0.5)*T.log(-0.5*z+0.5) )
        # squred cost.
        #L = -T.sum( (x-z)**2 )
        
        cost = T.mean(L) + 0.01*(self.W**2).sum()   # cost for a minibatch
        return cost 
示例#5
0
def _transform_affine(theta, input, downsample_factor):
    num_batch, num_channels, height, width = input.shape
    theta = T.reshape(theta, (-1, 2, 3))

    # grid of (x_t, y_t, 1), eq (1) in ref [1]
    out_height = T.cast(height / downsample_factor[0], 'int64')
    out_width = T.cast(width / downsample_factor[1], 'int64')
    grid = _meshgrid(out_height, out_width)

    # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s)
    T_g = T.dot(theta, grid)
    x_s = T_g[:, 0]
    y_s = T_g[:, 1]
    x_s_flat = x_s.flatten()
    y_s_flat = y_s.flatten()

    # dimshuffle input to  (bs, height, width, channels)
    input_dim = input.dimshuffle(0, 2, 3, 1)
    input_transformed = _interpolate(
        input_dim, x_s_flat, y_s_flat,
        out_height, out_width)

    output = T.reshape(
        input_transformed, (num_batch, out_height, out_width, num_channels))
    output = output.dimshuffle(0, 3, 1, 2)  # dimshuffle to conv format
    return output
def tensor_softmax(inpt, n_classes=2):
    output = inpt.dimshuffle(0, 3, 4, 1, 2)
    output = T.reshape(output, (-1, n_classes))

    f = lookup('softmax', _transfer)
    output = T.reshape(f(output), (1, -1, n_classes))
    return output
def depool(X, factor=2):
    """
    luke perforated upsample
    http://www.brml.org/uploads/tx_sibibtex/281.pdf
    """
    output_shape = [
        X.shape[1],
        X.shape[2]*factor,
        X.shape[3]*factor
    ]
    stride = X.shape[2]
    offset = X.shape[3]
    in_dim = stride * offset
    out_dim = in_dim * factor * factor

    upsamp_matrix = T.zeros((in_dim, out_dim))
    rows = T.arange(in_dim)
    cols = rows*factor + (rows/stride * factor * offset)
    upsamp_matrix = T.set_subtensor(upsamp_matrix[rows, cols], 1.)

    flat = T.reshape(X, (X.shape[0], output_shape[0], X.shape[2] * X.shape[3]))

    up_flat = T.dot(flat, upsamp_matrix)
    upsamp = T.reshape(up_flat, (X.shape[0], output_shape[0],
                                 output_shape[1], output_shape[2]))

    return upsamp
示例#8
0
def _meshgrid(height, width, depth):
    # This function is the grid generator from eq. (1) in reference [1].
    # It is equivalent to the following numpy code:
    #  x_t, y_t,z_t = np.meshgrid(np.linspace(-1, 1, width),
    #                         np.linspace(-1, 1, height))
    #  ones = np.ones(np.prod(x_t.shape))
    #  grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
    # It is implemented in Theano instead to support symbolic grid sizes.
    # Note: If the image size is known at layer construction time, we could
    # compute the meshgrid offline in numpy instead of doing it dynamically
    # in Theano. However, it hardly affected performance when we tried.
    x_t = T.dot(
        T.reshape(T.dot(
            _linspace(-1.0, 1.0, height).dimshuffle(0, 'x'),
            T.ones((1, width))), (height, width, 1)),
        T.ones((1, 1, depth))
    )
    y_t = T.dot(
        T.reshape(T.dot(
            T.ones((height, 1)),
            _linspace(-1.0, 1.0, width).dimshuffle('x', 0)), (height, width, 1)),
        T.ones((1, 1, depth))
    )
    z_t = T.dot(T.ones((height, width, 1)), T.reshape(_linspace(-1.0, 1.0, depth), (1, 1, -1)))

    x_t_flat = x_t.reshape((1, -1))
    y_t_flat = y_t.reshape((1, -1))
    z_t_flat = z_t.reshape((1, -1))
    ones = T.ones_like(x_t_flat)
    grid = T.concatenate([x_t_flat, y_t_flat, z_t_flat, ones], axis=0)
    return grid
示例#9
0
    def k_max_pool(self, x, k):
        """
        perform k-max pool on the input along the rows

        input: theano.tensor.tensor4
           
        k: theano.tensor.iscalar
            the k parameter

        Returns: 
        4D tensor
        """
        x = T.reshape(x, (x.shape[0], x.shape[1], 1, x.shape[2] * x.shape[3]))
        ind = T.argsort(x, axis=3)

        sorted_ind = T.sort(ind[:, :, :, -k:], axis=3)

        dim0, dim1, dim2, dim3 = sorted_ind.shape

        indices_dim0 = T.arange(dim0).repeat(dim1 * dim2 * dim3)
        indices_dim1 = (
            T.arange(dim1).repeat(dim2 * dim3).reshape((dim1 * dim2 * dim3, 1)).repeat(dim0, axis=1).T.flatten()
        )
        indices_dim2 = T.arange(dim2).repeat(dim3).reshape((dim2 * dim3, 1)).repeat(dim0 * dim1, axis=1).T.flatten()

        result = x[indices_dim0, indices_dim1, indices_dim2, sorted_ind.flatten()].reshape(sorted_ind.shape)
        shape = (result.shape[0], result.shape[1], result.shape[2] * result.shape[3], 1)

        result = T.reshape(result, shape)

        return result
示例#10
0
文件: cold2.py 项目: zenna/ig
def make_ro(r, raster_space, width, height):
    """Symbolically render rays starting with raster_space according to geometry
      e  defined by """
    nmatrices = r.shape[0]
    resolution = np.array([width, height], dtype=config.floatX)
    # Normalise it to be bound between 0 1
    norm_raster_space = raster_space / resolution
    # Put it in NDC space, -1, 1
    screen_space = -1.0 + 2.0 * norm_raster_space
    # Make pixels square by mul by aspect ratio
    ndc_space = screen_space * np.array([resolution[0]/resolution[1],1.0], dtype=config.floatX)
    # Ray Direction

    # Position on z-plane
    ndc_xyz = stack(ndc_space, width, height, 1.0)*0.5 # Change focal length

    # Put the origin farther along z-axis
    ro = np.array([0,0,1.5], dtype=config.floatX)

    # Rotate both by same rotation matrix
    ro_t = T.dot(T.reshape(ro, (1,3)), r)
    ndc_t = T.dot(T.reshape(ndc_xyz, (1, width, height, 3)), r)
    ndc_t = T.reshape(ndc_t, (width, height, nmatrices, 3))
    ndc_t = T.transpose(ndc_t, (2,0,1,3))

    # Increment by 0.5 since voxels are in [0, 1]
    ro_t = ro_t + 0.5
    ndc_t = ndc_t + 0.5
    # Find normalise ray dirs from origin to image plane
    unnorm_rd = ndc_t - T.reshape(ro_t, (nmatrices,1,1,3))
    rd = unnorm_rd / T.reshape(unnorm_rd.norm(2, axis=3), (nmatrices, width, height, 1))
    return rd, ro_t
示例#11
0
    def error(self, outputs):
        '''Build a theano expression for computing the network error.

        Parameters
        ----------
        outputs : dict mapping str to theano expression
            A dictionary of all outputs generated by the layers in this network.

        Returns
        -------
        error : theano expression
            A theano expression representing the network error.
        '''
        output = outputs[self.output_name()]
        alpha = outputs['hid2:alpha']
        alpha_sum = alpha.sum(axis = 0) # max_dst_len * batch_size * max_src_len
        alpha_l_inf = alpha_sum.max(axis = -1) # batch_size

        # flatten all but last components of the output and labels
        n = output.shape[0] * output.shape[1]
        
        #print output.shape.eval()
        correct = TT.reshape(self.labels, (n, ))
        weights = TT.reshape(self.weights, (n, ))
        prob = TT.reshape(output, (n, output.shape[2]))
        nlp = -TT.log(TT.clip(prob[TT.arange(n), correct], 1e-8, 1))
        if self.weighted:
            return (weights * nlp).sum() / weights.sum() +  alpha_l_inf.mean()
        return nlp.mean()
示例#12
0
文件: Models.py 项目: chuckgu/RNN
    def build(self,output_type):      
        self.params+=[self.W_hy, self.b_hy,self.W_hi, self.b_hi]
        for param in self.params:
            self.updates[param] = theano.shared(
                                      value = np.zeros(
                                                  param.get_value(
                                                      borrow = True).shape,
                                                      dtype = theano.config.floatX),
                                      name = 'updates')
         

        ### set up regularizer                               
   
        self.L1 += T.sum(abs(self.W_hy))    
        self.L2_sqr += T.sum(self.W_hy**2)
                                                                  
        ### fianl prediction formular
                                             
        #self.y = T.vector(name = 'y', dtype = 'int32')
                                             
        
               
        self.y_pred = T.dot(self.get_output(), self.W_hy) + self.b_hy
        
        y_p = self.y_pred
        y_p_m = T.reshape(y_p, (y_p.shape[0] * y_p.shape[1], -1))
        y_p_s = T.nnet.softmax(y_p_m)
        self.p_y_given_x = T.reshape(y_p_s, y_p.shape)
                
        
        self.loss = lambda y: Loss.nll_multiclass(self.p_y_given_x,y)
示例#13
0
            def _active(m, pre_h, x):
                x = T.reshape(x, (self.batch_size, last_shape[0]))

                pre_h = T.reshape(pre_h, (self.batch_size, last_shape[1]))

                h = self.decoder._active(x, pre_h)

                y = T.nnet.softmax(T.dot(h, self.W_hy) + self.b_y)
                y = y * m[:, None]
                print type(y)

                y_dim_y = y[:, 0:self.dim_y]
                y_dim_pos = y[:, self.dim_y:]
                print type(y_dim_y)
                print type(y_dim_pos)

                new_y_dim_y = y_dim_y + T.dot(y_dim_pos, self.word_tag_matrix)

                # y = np.column_stack((new_y_dim_y, y_dim_pos))
                y = T.concatenate([new_y_dim_y, y_dim_pos], axis=1)
                print type(y)

                h = T.reshape(h, (1, self.batch_size * last_shape[1]))
                y = T.reshape(y, (1, self.batch_size * last_shape[0]))
                return h, y, new_y_dim_y, y_dim_pos
示例#14
0
def T_l2_cost_conv_dA(x,a,A,imshp,kshp,featshp,stride=(1,1),mask=True):
    image_error, kernel, features = helper_T_l2_cost_conv(x=x,a=a,A=A,imshp=imshp,kshp=kshp,featshp=featshp,stride=stride,mask=mask)

    if stride == (1,1):

        image_error_rot = T.transpose(image_error,[1,0,2,3])[:,:,::-1,::-1]
        imshp_rot = (imshp[1],imshp[0],imshp[2],imshp[3])
        featshp_rot = (featshp[1],featshp[0],featshp[2],featshp[3])
        features_rot = T.transpose(features,[1,0,2,3])

        featshp_rot_logical = (featshp_rot[0],
                               featshp_rot[1],
                               imshp[2] - kshp[2] + 1,
                               imshp[3] - kshp[3] + 1)
        kernel_grad_rot = -1.*conv2d(image_error_rot,features_rot,
                                  image_shape=imshp_rot,filter_shape=featshp_rot,
                                  imshp_logical=imshp_rot[1:],kshp_logical=featshp_rot_logical[2:])
        kernel_grad = T.transpose(kernel_grad_rot,[1,0,2,3])

        reshape_kernel_grad = T.transpose(T.reshape(kernel_grad,(kshp[0],kshp[1]*kshp[2]*kshp[3]),ndim=2))

        return reshape_kernel_grad

    else:
        my_conv = MyConv_view(strides=stride,kshp=kshp)
        kernel_grad = my_conv(image_error,features)

        reshape_kernel_grad = T.transpose(T.reshape(kernel_grad, (kshp[0], kshp[1] * kshp[2] * kshp[3]), ndim=2))

        return reshape_kernel_grad
示例#15
0
def T_subspacel1_slow_shrinkage_conv(a, L, lam_sparse, lam_slow, imshp,kshp,featshp,stride=(1,1),small_value=.001):
    featshp = (imshp[0],kshp[0],featshp[2],featshp[3]) # num images, features, szy, szx
    features = T.reshape(T.transpose(a),featshp,ndim=4)

    amp = T.sqrt(features[:,::2,:,:]**2 + features[:,1::2,:,:]**2 + small_value)
    #damp = amp[:,1:] - amp[:,:-1]

    # compose slow shrinkage with subspace l1 shrinkage

    # slow shrinkage
    div = T.zeros_like(amp)
    d1 = amp[1:,:,:,:] - amp[:-1,:,:,:]
    d2 = d1[1:,:,:,:] - d1[:-1,:,:,:]
    div = T.set_subtensor(div[1:-1,:,:,:], -d2)
    div = T.set_subtensor(div[0,:,:,:], -d1[0,:,:,:])
    div = T.set_subtensor(div[-1,:,:,:], d1[-1,:,:,:])
    slow_amp_shrinkage = 1 - (lam_slow / L) * (div / amp)
    slow_amp_value = T.switch(T.gt(slow_amp_shrinkage, 0), slow_amp_shrinkage, 0)
    slow_shrinkage_prox_a = slow_amp_value * features[:, ::2, :,:]
    slow_shrinkage_prox_b = slow_amp_value * features[:,1::2, :,:]

    # subspace l1 shrinkage
    amp_slow_shrinkage_prox = T.sqrt(slow_shrinkage_prox_a ** 2 + slow_shrinkage_prox_b ** 2)
    #amp_shrinkage = 1. - (lam_slow*lam_sparse/L)*amp_slow_shrinkage_prox
    amp_shrinkage = 1. - (lam_sparse / L) / amp_slow_shrinkage_prox
    amp_value = T.switch(T.gt(amp_shrinkage, 0.), amp_shrinkage, 0.)
    subspacel1_prox = T.zeros_like(features)
    subspacel1_prox = T.set_subtensor(subspacel1_prox[:, ::2, :,:], amp_value * slow_shrinkage_prox_a)
    subspacel1_prox = T.set_subtensor(subspacel1_prox[:,1::2, :,:], amp_value * slow_shrinkage_prox_b)

    reshape_subspacel1_prox = T.transpose(T.reshape(subspacel1_prox,(featshp[0],featshp[1]*featshp[2]*featshp[3]),ndim=2))
    return reshape_subspacel1_prox
示例#16
0
def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
                mode='max'):
    """
    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1])

    :type input: N-D theano tensor of input images.
    :param input: input images. Max pooling will be done over the 2 last
        dimensions.
    :type ds: tuple of length 2
    :param ds: factor by which to downscale (vertical ds, horizontal ds).
        (2,2) will halve the image in each dimension.
    :type ignore_border: bool
    :param ignore_border: When True, (5,5) input with ds=(2,2)
        will generate a (2,2) output. (3,3) otherwise.
    :type st: tuple of lenght 2
    :param st: stride size, which is the number of shifts
        over rows/cols to get the the next pool region.
        if st is None, it is considered equal to ds
        (no overlap on pooling regions)
    :param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
            of the images, pad_h is the size of the top and bottom margins,
            and pad_w is the size of the left and right margins.
    :type padding: tuple of two ints
    :param mode: 'max', 'average_inc_pad' or 'average_exc_pad'.
        Operation executed on each window.  `max` always excludes the padding
        in the computation. `average` gives you the choice to include or
        exclude it.
    :type mode: string
    """
    if input.ndim < 2:
        raise NotImplementedError('max_pool_2d requires a dimension >= 2')
    if input.ndim == 4:
        op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding,
                                 mode=mode)
        output = op(input)
        return output

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(tensor.join(0, batch_size,
                                        tensor.as_tensor([1]),
                                        img_shape), 'int64')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding,
                             mode=mode)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
示例#17
0
 def cost(self):
   """
   :param y: shape (time*batch,) -> label
   :return: error scalar, known_grads dict
   """
   y_f = T.cast(T.reshape(self.y_data_flat, (self.y_data_flat.shape[0] * self.y_data_flat.shape[1]), ndim = 1), 'int32')
   known_grads = None
   if self.loss == 'sprint':
     if not isinstance(self.sprint_opts, dict):
       import json
       self.sprint_opts = json.loads(self.sprint_opts)
     assert isinstance(self.sprint_opts, dict), "you need to specify sprint_opts in the output layer"
     if self.exp_normalize:
       log_probs = T.log(self.p_y_given_x)
     else:
       log_probs = self.z
     sprint_error_op = SprintErrorSigOp(self.attrs.get("target", "classes"), self.sprint_opts)
     err, grad = sprint_error_op(log_probs, T.sum(self.index, axis=0))
     err = err.sum()
     if self.loss_like_ce:
       y_ref = T.clip(self.p_y_given_x - grad, numpy.float32(0), numpy.float32(1))
       err = -T.sum(T.log(T.pow(self.p_y_given_x, y_ref)) * T.cast(self.index, "float32").dimshuffle(0, 1, 'x'))
     if self.ce_smoothing:
       err *= numpy.float32(1.0 - self.ce_smoothing)
       grad *= numpy.float32(1.0 - self.ce_smoothing)
       if not self.prior_scale:  # we kept the softmax bias as it was
         nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y_data_flat[self.i])
       else:  # assume that we have subtracted the bias by the log priors beforehand
         assert self.log_prior is not None
         # In this case, for the CE calculation, we need to add the log priors again.
         y_m_prior = T.reshape(self.z + numpy.float32(self.prior_scale) * self.log_prior,
                               (self.z.shape[0] * self.z.shape[1], self.z.shape[2]), ndim=2)
         nll, pcx = T.nnet.crossentropy_softmax_1hot(x=y_m_prior[self.i], y_idx=self.y_data_flat[self.i])
       ce = numpy.float32(self.ce_smoothing) * T.sum(nll)
       err += ce
       grad += T.grad(ce, self.z)
     known_grads = {self.z: grad}
     return err, known_grads
   elif self.loss == 'ctc':
     from theano.tensor.extra_ops import cpu_contiguous
     err, grad, priors = CTCOp()(self.p_y_given_x, cpu_contiguous(self.y.dimshuffle(1, 0)), self.index_for_ctc())
     known_grads = {self.z: grad}
     return err.sum(), known_grads, priors.sum(axis=0)
   elif self.loss == 'ce_ctc':
     y_m = T.reshape(self.z, (self.z.shape[0] * self.z.shape[1], self.z.shape[2]), ndim=2)
     p_y_given_x = T.nnet.softmax(y_m)
     #pcx = p_y_given_x[(self.i > 0).nonzero(), y_f[(self.i > 0).nonzero()]]
     pcx = p_y_given_x[self.i, self.y_data_flat[self.i]]
     ce = -T.sum(T.log(pcx))
     return ce, known_grads
   elif self.loss == 'ctc2':
     from NetworkCtcLayer import ctc_cost, uniq_with_lengths, log_sum
     max_time = self.z.shape[0]
     num_batches = self.z.shape[1]
     time_mask = self.index.reshape((max_time, num_batches))
     y_batches = self.y_data_flat.reshape((max_time, num_batches))
     targets, seq_lens = uniq_with_lengths(y_batches, time_mask)
     log_pcx = self.z - log_sum(self.z, axis=0, keepdims=True)
     err = ctc_cost(log_pcx, time_mask, targets, seq_lens)
     return err, known_grads
	def forward_filter_step(self, xp):
		
		#need to sample from the proposal distribution first
		
		#these terms are the same for every particle
		xpred=T.dot(self.W.T,(xp-self.c))/(2.0*self.xvar**2)
		sig=(1.0/(self.b**2+1.0/(2.0*self.xvar**2)))/2.0
		
		[s_samps, s_pred, prop_terms], updates = theano.scan(fn=self.sample_proposal_s,
										outputs_info=[None, None, None],
										sequences=[self.s_now, self.h_now],
										non_sequences=[xpred, sig],
										n_steps=self.npcl)
		
		#now that we have samples from the proposal distribution, we need to reweight them
		
		#would use this if we have multiple generative models
		#recons, updates = theano.scan(fn=get_recon,
										#outputs_info=[None],
										#sequences=[s_samps, h_samps],
										#n_steps=self.npcl)
		
		#this loops over every row of A and mu to calculate relative h probabilities
		#for each particle
		
		h_probs = self.calc_h_probs(s_samps)
		
		h_samps=self.theano_rng.multinomial(pvals=h_probs.T)
		
		recons=T.dot(self.W, s_samps.T) + T.reshape(self.c,(self.nx,1))
		
		x_terms=-T.sum((recons-T.reshape(xp,(self.nx,1)))**2,axis=0)/(2.0*self.xvar**2)
		s_terms=-T.sum(((s_samps-s_pred)*self.b)**2,axis=1)
		
		energies=x_terms+s_terms-prop_terms
		
		#to avoid exponentiating large or very small numbers, I 
		#"re-center" the reweighting factors by adding a constant, 
		#as this has no impact on the resulting new weights
		
		energies_recentered=energies-T.max(energies)
		
		alpha=T.exp(energies_recentered) #these are the reweighting factors
		
		new_weights_unnorm=self.weights_now*alpha
		normalizer=T.sum(new_weights_unnorm)
		new_weights=new_weights_unnorm/normalizer  #need to normalize new weights
		
		updates[self.h_past]=T.cast(self.h_now,'float32')
		updates[self.s_past]=T.cast(self.s_now,'float32')
		
		updates[self.h_now]=T.cast(h_samps,'float32')
		updates[self.s_now]=T.cast(s_samps,'float32')
		
		updates[self.weights_past]=T.cast(self.weights_now,'float32')
		updates[self.weights_now]=T.cast(new_weights,'float32')
		
		#return normalizer, energies_recentered, s_samps, s_pred, T.dot(self.W.T,(xp-self.c)), updates
		#return normalizer, energies_recentered, updates
		return h_samps, updates
示例#19
0
文件: special.py 项目: rmanor/Lasagne
def _transform(theta, input, downsample_factor):
    num_batch, num_channels, height, width = input.shape
    theta = T.reshape(theta, (-1, 1))

    # grid of (x_t, y_t, 1), eq (1) in ref [1]
    out_height = T.cast(height / downsample_factor[0], 'int64')
    out_width = T.cast(width / downsample_factor[1], 'int64')
    grid = _meshgrid(out_height, out_width)
   
    zeros = T.zeros_like(theta)
    padded_theta = T.concatenate([theta, zeros], axis=1)
    T_g = padded_theta.dimshuffle(0, 1, 'x') + grid.dimshuffle('x', 0, 1)

    x_s = T_g[:, 0]
    y_s = T_g[:, 1]
    x_s_flat = x_s.flatten()
    y_s_flat = y_s.flatten()

    # dimshuffle input to  (bs, height, width, channels)
    input_dim = input.dimshuffle(0, 2, 3, 1)
    input_transformed = _interpolate(
        input_dim, x_s_flat, y_s_flat,
        out_height, out_width)

    output = T.reshape(
        input_transformed, (num_batch, out_height, out_width, num_channels))
    output = output.dimshuffle(0, 3, 1, 2)  # dimshuffle to conv format
    return output
示例#20
0
文件: iq.py 项目: zenna/Arrows.jl
def castray(ro, rd, shape_params, nprims, width, height):
    tmin = 1.0
    tmax = 20.0
    precis = 0.002
    m = -1.0
    # There are a sequence of distances, d1, d2, ..., dn
    # then theres the accumulated distances d1, d1+d2, d1+d2+d3....
    # What we actually want in the output is the sfor each ray the distance to the surface
    # So we want something like 0, 20, 25, 27, 28, 28, 28, 28, 28
    # OK

    max_num_steps = 25

    # distcolors = map(ro + rd * 0, width, height) #FIXME, reshape instead of mul by 0
    distcolors = mapedit(ro + rd * 0, shape_params, nprims, width, height)
    dists = distcolors
    steps = T.switch(dists < precis, T.zeros_like(dists), T.ones_like(dists))
    accum_dists = T.reshape(dists, (width, height, 1))

    for i in range(max_num_steps - 1):
        # distcolors = map(ro + rd * accum_dists, width, height) #FIXME, reshape instead of mul by 0
        distcolors = mapedit(ro + rd * accum_dists, shape_params, nprims, width, height) #FIXME, reshape instead of mul by 0
        dists = distcolors
        steps = steps + T.switch(dists < precis, T.zeros_like(dists), T.ones_like(dists))
        accum_dists = accum_dists + T.reshape(dists, (width, height, 1))

    last_depth = T.reshape(accum_dists, (width, height))
    depthmap = T.switch(last_depth < tmax, last_depth / tmax, T.zeros_like(last_depth))
    color = 1.0 - steps / float(max_num_steps)
    # Distance marched along ray and delta between last two steps
    return depthmap
示例#21
0
文件: iq.py 项目: zenna/Arrows.jl
def mapedit(pos, params, nprims, width, height):
    pos_repeat = T.reshape(T.tile(pos, nprims), (width, height, nprims, 3))
    translate_params = params[:, 0:3]
    translated_pos = pos_repeat + translate_params
    # Do sphere
    norms = translated_pos.norm(2, axis = 3)
    sphere_radii = params[:, 3]
    spheredists = norms - sphere_radii
    # Round box
    box_radii = params[:, 4] # FIXME? Share radii param?
    abspos = T.clip(T.abs_(translated_pos) - np.array([.15, .15, .15]), 0.0, 1000.0)
    rounddists = abspos.norm(2, axis = 3) - box_radii
    # Blend
    blend_params = params[:, 5:7]
    expweights = T.exp(blend_params)
    softweights = expweights / T.reshape(T.sum(expweights, axis = 1), (nprims, 1))
    # MIX
    stacked = T.stack([spheredists, rounddists], axis=3)
    reweighted = stacked * softweights
    mixed = T.sum(reweighted, axis = 3)
    union = mixed.min(axis=2)
    # add colour and plane
    stacked_union = adddim(union) # GET RID OF COOLOUR FROM GEOM
    plane = sdPlane(pos)
    return opU(stacked_union, plane, width, height)
def compute_f_mu(x, t, params):
	[centers, spreads, biases, M, b]=params
	diffs=x.dimshuffle(0,1,2,'x')-centers.dimshuffle('x','x',0,1)
	scaled_diffs=(diffs**2)*T.exp(spreads).dimshuffle('x','x',0,1)
	exp_terms=T.sum(scaled_diffs,axis=2)+biases.dimshuffle('x','x',0)*0.0
	h=T.exp(-exp_terms)
	sumact=T.sum(h,axis=2)
	#Normalization
	hnorm=h/sumact.dimshuffle(0,1,'x')
	z=T.dot(hnorm,M)
	z=T.reshape(z,(t.shape[0],t.shape[1],ntgates,nx))+b.dimshuffle('x','x',0,1) #nt by nb by ntgates by nx
	#z=z+T.reshape(x,(t.shape[0],t.shape[1],1,nx))
	
	tpoints=T.cast(T.arange(ntgates),'float32')/T.cast(ntgates-1,'float32')
	tpoints=T.reshape(tpoints, (1,1,ntgates))
	#tgating=T.exp(T.dot(t,muWT)+mubT) #nt by nb by ntgates
	tgating=T.exp(-kT*(tpoints-t)**2)
	tgating=tgating/T.reshape(T.sum(tgating, axis=2),(t.shape[0], t.shape[1], 1))
	tgating=T.reshape(tgating,(t.shape[0],t.shape[1],ntgates,1))
	
	mult=z*tgating
	
	out=T.sum(mult,axis=2)
	
	#out=out+x
	
	return T.cast(out,'float32')
def maxpool_3D(input, ds, ignore_border=False):
   
    #input.dimshuffle (0, 2, 1, 3, 4)   # convert to make video in back. 
    # no need to reshuffle. 
    if input.ndim < 3:
        raise NotImplementedError('max_pool_3d requires a dimension >= 3')

    # extract nr dimensions
    vid_dim = input.ndim
    # max pool in two different steps, so we can use the 2d implementation of 
    # downsamplefactormax. First maxpool frames as usual. 
    # Then maxpool the time dimension. Shift the time dimension to the third 
    # position, so rows and cols are in the back


    # extract dimensions
    frame_shape = input.shape[-2:]
    
    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input.shape[:-2])
    batch_size = T.shape_padright(batch_size,1)
    
    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = T.cast(T.join(0, batch_size,
                                        T.as_tensor([1,]), 
                                        frame_shape), 'int32')
    input_4D = T.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of videos in rows and cols
    op = DownsampleFactorMax((ds[1],ds[2]), ignore_border)          # so second and third dimensions of ds are for height and width
    output = op(input_4D)
    # restore to original shape                                     
    outshape = T.join(0, input.shape[:-2], output.shape[-2:])
    out = T.reshape(output, outshape, ndim=input.ndim)

    # now maxpool time
    # output (time, rows, cols), reshape so that time is in the back
    shufl = (list(range(vid_dim-3)) + [vid_dim-2]+[vid_dim-1]+[vid_dim-3])
    input_time = out.dimshuffle(shufl)
    # reset dimensions
    vid_shape = input_time.shape[-2:]
    
    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input_time.shape[:-2])
    batch_size = T.shape_padright(batch_size,1)
    
    # store as 4D tensor with shape: (batch_size,1,width,time)
    new_shape = T.cast(T.join(0, batch_size,
                                        T.as_tensor([1,]), 
                                        vid_shape), 'int32')
    input_4D_time = T.reshape(input_time, new_shape, ndim=4)
    # downsample mini-batch of videos in time
    op = DownsampleFactorMax((1,ds[0]), ignore_border)            # Here the time dimension is downsampled. 
    outtime = op(input_4D_time)
    # output 
    # restore to original shape (xxx, rows, cols, time)
    outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2])
    #rval = T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
    return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
示例#24
0
文件: utils.py 项目: Nehoroshiy/urnn
def do_fft(input, n_hidden):
    fft_input = T.reshape(input, (input.shape[0], 2, n_hidden))
    fft_input = fft_input.dimshuffle(0,2,1)
    fft_output = cufft(fft_input) * T.sqrt(n_hidden)
    fft_output = fft_output.dimshuffle(0,2,1)
    output = T.reshape(fft_output, (input.shape[0], 2*n_hidden))
    return output
示例#25
0
文件: utils.py 项目: Nehoroshiy/urnn
def do_ifft(input, n_hidden):
    ifft_input = T.reshape(input, (input.shape[0], 2, n_hidden))
    ifft_input = ifft_input.dimshuffle(0,2,1)
    ifft_output = cuifft(ifft_input) / T.sqrt(n_hidden)
    ifft_output = ifft_output.dimshuffle(0,2,1)
    output = T.reshape(ifft_output, (input.shape[0], 2*n_hidden))
    return output
示例#26
0
def T_l2_cost_conv(x,a,A,imshp,kshp,mask=True):
    """
    xsz*ysz*nchannels, nimages = x.shape
    xsz*ysz*nfeat, nimages = a.shape
    xsz*ysz*nchannels, nfeat = A.shape
    """

    #imshp = num images, channels, szy, szx
    #kshp = features, channels, szy, szx
    #featshp = num images, features, szy, szx

    featshp = (imshp[0],kshp[0],imshp[2] - kshp[2] + 1,imshp[3] - kshp[3] + 1) # num images, features, szy, szx

    image = T.reshape(T.transpose(x),imshp)
    kernel = T.reshape(T.transpose(A),kshp)
    features = T.reshape(T.transpose(a),featshp)

    # Need to transpose first two dimensions of kernel, and reverse index kernel image dims (for correlation)
    kernel_rotated = T.transpose(kernel[:,:,::-1,::-1],axes=[1,0,2,3])

    image_estimate = conv2d(features,kernel_rotated,border_mode='full')

    if mask:
        image_error_temp = image - image_estimate
        image_error = T.zeros_like(image_error_temp)
        image_error = T.set_subtensor(image_error[:,:,(kshp[2]-1):(imshp[2]-kshp[2]+1),(kshp[3]-1):(imshp[3]-kshp[3]+1)],
                                 image_error_temp[:,:,(kshp[2]-1):(imshp[2]-kshp[2]+1),(kshp[3]-1):(imshp[3]-kshp[3]+1)])
    else:
        image_error = image - image_estimate

    return .5*T.sum(image_error **2)
示例#27
0
文件: utils.py 项目: Nehoroshiy/urnn
def unitary_transform(input, n_hidden, U):
    UR, UI = U[0, :, :], U[1, :, :]
    unitary_input = T.reshape(input, (input.shape[0], 2, n_hidden))
    IR, II = unitary_input[:, 0, :], unitary_input[:, 1, :]
    output = T.stack([IR.dot(UR) - II.dot(UI), IR.dot(UI) + II.dot(UR)], axis=1)
    output = T.reshape(output, (input.shape[0], 2*n_hidden))
    return output
def Transform(X, w1, g1, b1, w2, g2, b2, downsample_factor=2):
    theta = GetTheta(X, w1, g1, b1, w2, g2, b2)
    num_batch, num_channels, height, width = X.shape
    theta = T.reshape(theta, (-1, 2, 3))

    height_f = T.cast(height, 'float32')
    width_f = T.cast(width, 'float32')
    out_height = T.cast(height_f // downsample_factor, 'int64')
    out_width = T.cast(width_f // downsample_factor, 'int64')
    grid = Meshgrid(out_height, out_width)

    # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s)
    T_g = T.dot(theta, grid)
    x_s, y_s = T_g[:, 0], T_g[:, 1]
    x_s_flat = x_s.flatten()
    y_s_flat = y_s.flatten()

    # dimshuffle input to  (bs, height, width, channels)
    input_dim = input.dimshuffle(0, 2, 3, 1)
    input_transformed = Interpolate(input_dim, x_s_flat, y_s_flat, downsample_factor)

    output = T.reshape(input_transformed,
                       (num_batch, out_height, out_width, num_channels))

    output = output.dimshuffle(0, 3, 1, 2)
    return output
示例#29
0
    def get_output(self, train=False):
        X = self.get_input(train)
        X = T.reshape(X, (X.shape[0], X.shape[1], X.shape[2], 1)).dimshuffle(0, 2, 1, 3)

        border_mode = self.border_mode
        if on_gpu() and dnn.dnn_available():
            if border_mode == 'same':
                assert(self.subsample_length == 1)
                pad_x = (self.filter_length - self.subsample_length) // 2
                conv_out = dnn.dnn_conv(img=X,
                                        kerns=self.W,
                                        border_mode=(pad_x, 0))
            else:
                conv_out = dnn.dnn_conv(img=X,
                                        kerns=self.W,
                                        border_mode=border_mode,
                                        subsample=self.subsample)
        else:
            if border_mode == 'same':
                assert(self.subsample_length == 1)
                border_mode = 'full'

            conv_out = T.nnet.conv.conv2d(X, self.W,
                                          border_mode=border_mode,
                                          subsample=self.subsample)
            if self.border_mode == 'same':
                shift_x = (self.filter_length - 1) // 2
                conv_out = conv_out[:, :, shift_x:X.shape[2] + shift_x, :]

        output = self.activation(conv_out + self.b.dimshuffle('x', 0, 'x', 'x'))
        output = T.reshape(output, (output.shape[0], output.shape[1], output.shape[2])).dimshuffle(0, 2, 1)
        return output
示例#30
0
def max_pool_2d(input, ds, ignore_border=False):
    """
    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1])

    :type input: N-D theano tensor of input images.
    :param input: input images. Max pooling will be done over the 2 last dimensions.
    :type ds: tuple of length 2
    :param ds: factor by which to downscale. (2,2) will halve the image in each dimension.
    :param ignore_border: boolean value. When True, (5,5) input with ds=(2,2) will generate a
      (2,2) output. (3,3) otherwise.
    """
    if input.ndim < 2:
        raise NotImplementedError("max_pool_2d requires a dimension >= 2")

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), "int64")
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = DownsampleFactorMax(ds, ignore_border)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
示例#31
0
 def categorical_crossentropy(self, y_pred, y_true):
     y_pred = T.clip(y_pred, self.epsilon, 1.0 - self.epsilon)
     m = T.reshape(self.mask, (self.mask.shape[0] * self.batch_size, 1))
     ce = T.nnet.categorical_crossentropy(y_pred, y_true)
     ce = T.reshape(ce, (self.mask.shape[0] * self.batch_size, 1))
     return T.sum(ce * m) / T.sum(m)
示例#32
0
 def lookup_all(sentences):
     results, ups = theano.scan(lookup_sentence, sequences=[sentences])
     shape = results.shape
     return T.reshape(results, (shape[0], 1, shape[1], shape[2]),
                      ndim=4)
示例#33
0
def warp_bilinear_interpolation(orig_img, x, y, out_height, out_width):
    # shuffle channel dim to last dimension, since we want to apply the same
    # transform to the whole dim
    img = orig_img.dimshuffle(0, 2, 3, 1)
    # flatten batch dims
    x = x.flatten()
    y = y.flatten()
    # *_f are floats
    num_batch, height, width, num_channels = img.shape
    height_f = T.cast(height, theano.config.floatX)
    width_f = T.cast(width, theano.config.floatX)

    # scale indices from [-1, 1] to [0, width/height].
    x = (x + 1) / 2 * width_f
    y = (y + 1) / 2 * height_f

    # Clip indices to ensure they are not out of bounds.
    max_x = width_f - 1
    max_y = height_f - 1
    # TODO add monitoring to out of bounds points
    x0 = T.clip(x, 0, max_x)
    x1 = T.clip(x + 1, 0, max_x)
    y0 = T.clip(y, 0, max_y)
    y1 = T.clip(y + 1, 0, max_y)

    # We need floatX for interpolation and int64 for indexing.
    x0_f = T.floor(x0)
    x1_f = T.floor(x1)
    y0_f = T.floor(y0)
    y1_f = T.floor(y1)
    x0 = T.cast(x0, 'int64')
    x1 = T.cast(x1, 'int64')
    y0 = T.cast(y0, 'int64')
    y1 = T.cast(y1, 'int64')

    # The input is [num_batch, height, width, channels]. We do the lookup in
    # the flattened input, i.e [num_batch*height*width, channels]. We need
    # to offset all indices to match the flat version
    dim2 = width
    dim1 = width * height
    base = T.repeat(
        T.arange(num_batch, dtype='int64') * dim1, out_height * out_width)
    base_y0 = base + y0 * dim2
    base_y1 = base + y1 * dim2
    idx_a = base_y0 + x0
    idx_b = base_y1 + x0
    idx_c = base_y0 + x1
    idx_d = base_y1 + x1

    # use indices to lookup pixels for all samples
    img_flat = img.reshape((-1, num_channels))
    Ia = img_flat[idx_a]
    Ib = img_flat[idx_b]
    Ic = img_flat[idx_c]
    Id = img_flat[idx_d]

    # calculate interpolated values
    wa = ((x1_f - x) * (y1_f - y)).dimshuffle(0, 'x')
    wb = ((x1_f - x) * (y - y0_f)).dimshuffle(0, 'x')
    wc = ((x - x0_f) * (y1_f - y)).dimshuffle(0, 'x')
    wd = ((x - x0_f) * (y - y0_f)).dimshuffle(0, 'x')
    output_2d = T.sum([wa * Ia, wb * Ib, wc * Ic, wd * Id], axis=0)
    output_4d = T.reshape(output_2d,
                          (num_batch, out_height, out_width, num_channels))
    # convert back from b01c (batch, dim0, dim1, channels)
    # to bc01 (batch, channels, dim0, dim1)
    output = output_4d.dimshuffle(0, 3, 1, 2)
    return output
示例#34
0
 def get_real_coefficients(self):
     return (
         tt.reshape(self.a, (self.a.size, )),
         tt.reshape(self.c, (self.c.size, )),
     )
示例#35
0
    def build(self):
        """
        Build the model variables.
        """
        CMReduction = self.build_reduction_var()

        # Window of active countermeasures extended into the past
        Earlier_ActiveCMs = self.d.get_ActiveCMs(
            self.d.Ds[0] - pd.DateOffset(self.CMDelayCut), self.d.Ds[-1])

        # [region, CM, day] Reduction factor for each CM,C,D
        ActiveCMReduction = (T.reshape(CMReduction,
                                       (1, self.nCMs, 1))**Earlier_ActiveCMs)

        # [region, day] Reduction factor from CMs for each C,D (noise added below)
        GrowthReduction = self.Det("GrowthReduction",
                                   T.prod(ActiveCMReduction, axis=1),
                                   plot_trace=False)

        # [region, day] Convolution of GrowthReduction by DelayProb along days
        DelayedGrowthReduction = self.Det(
            "DelayedGrowthReduction",
            geom_convolution(GrowthReduction, self.CMDelayProb,
                             axis=1)[:, self.CMDelayCut:],
            plot_trace=False,
        )

        # [] Baseline growth rate (wide prior OK, mean estimates ~10% daily growth)
        BaseGrowthRate = self.LogNorm("BaseGrowthRate", 1.2, 2.3)

        # [region] Region growth rate
        # TODO: Estimate growth rate variance
        RegionGrowthRate = self.LogNorm("RegionGrowthRate",
                                        BaseGrowthRate,
                                        0.3,
                                        shape=(self.nRs, ))

        # [region] Region unreliability as common scale multiplier of its:
        # * measurements (measurement unreliability)
        # * expected growth noise
        # TODO: Estimate good prior (but can be weak?)
        RegionScaleMult = self.LogNorm("RegionScaleMult",
                                       1.0,
                                       1.0,
                                       shape=(self.nRs, ))

        # [region, day] The ideal predicted daily growth
        PredictedGrowth = self.Det(
            "PredictedGrowth",
            T.reshape(RegionGrowthRate,
                      (self.nRs, 1)) * DelayedGrowthReduction,
            plot_trace=False,
        )

        # [region, day] The actual (still hidden) growth rate each day
        # TODO: Estimate noise varince (should be small, measurement variance below)
        #       Miscalibration: too low: time effects pushed into CMs, too high: explains away CMs
        RealGrowth = self.LogNorm(
            "RealGrowth",
            PredictedGrowth,
            RegionScaleMult.reshape((self.nRs, 1)) * 0.1,
            shape=(self.nRs, self.nDs),
            plot_trace=False,
        )

        # [region, day] Multiplicative noise applied to predicted growth rate
        RealGrowthNoise = self.Det("RealGrowthNoise",
                                   RealGrowth / PredictedGrowth,
                                   plot_trace=False)

        # [region] Initial size of epidemic (the day before the start, only those detected; wide prior OK)
        InitialSize = self.LogNorm("InitialSize", 1.0, 10, shape=(self.nRs, ))

        # [region, day] The number of cases that would be detected with noiseless testing
        # (Noise source includes both false-P/N rates and local variance in test volume and targetting)
        # (Since we ony care about growth rates and assume consistent testing, it is fine to ignore real size)
        Size = self.Det(
            "Size",
            T.reshape(InitialSize,
                      (self.nRs, 1)) * self.RealGrowth.cumprod(axis=1),
            plot_trace=False,
        )

        # [region, day] Cummulative tested positives
        Observed = self.LogNorm(
            "Observed",
            Size,
            0.4,  # self.RegionScaleMult.reshape((self.nRs, 1)) * 0.4,
            shape=(self.nRs, self.nDs),
            observed=self.d.Confirmed,
            plot_trace=False,
        )

        # [region, day] Multiplicative noise applied to predicted growth rate
        # Note: computed backwards, since self.Observed needs to be a distribution
        ObservedNoise = self.Det("ObservedNoise",
                                 Observed / Size,
                                 plot_trace=False)
示例#36
0
def flatten(array):
    return T.reshape(m, (size(m), )).eval()
示例#37
0
def max_pool_3d(input, ds, ignore_border=False):
    """
    Takes as input a N-D tensor, where N >= 3. It downscales the input video by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1],ds[2]) (time, height, width)

    :type input: N-D theano tensor of input images.
    :param input: input images. Max pooling will be done over the 3 last dimensions.
    :type ds: tuple of length 3
    :param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension.
    :param ignore_border: boolean value. When True, (5,5,5) input with ds=(2,2,2) will generate a
      (2,2,2) output. (3,3,3) otherwise.
    """

    if input.ndim < 3:
        raise NotImplementedError('max_pool_3d requires a dimension >= 3')

    # extract nr dimensions
    vid_dim = input.ndim
    # max pool in two different steps, so we can use the 2d implementation of
    # downsamplefactormax. First maxpool frames as usual.
    # Then maxpool the time dimension. Shift the time dimension to the third
    # position, so rows and cols are in the back

    # extract dimensions
    frame_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input.shape[:-2])
    batch_size = T.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = T.cast(T.join(0, batch_size, T.as_tensor([
        1,
    ]), frame_shape), 'int32')
    input_4D = T.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of videos in rows and cols
    output = T.signal.pool.pool_2d(input_4D, (ds[1], ds[2]), ignore_border)
    # restore to original shape
    outshape = T.join(0, input.shape[:-2], output.shape[-2:])
    out = T.reshape(output, outshape, ndim=input.ndim)

    # now maxpool time

    # output (time, rows, cols), reshape so that time is in the back
    shufl = (list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] +
             [vid_dim - 3])
    input_time = out.dimshuffle(shufl)
    # reset dimensions
    vid_shape = input_time.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input_time.shape[:-2])
    batch_size = T.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,width,time)
    new_shape = T.cast(T.join(0, batch_size, T.as_tensor([
        1,
    ]), vid_shape), 'int32')
    input_4D_time = T.reshape(input_time, new_shape, ndim=4)
    # downsample mini-batch of videos in time
    outtime = T.signal.pool.pool_2d(input_4D_time, (1, ds[0]), ignore_border)
    # output
    # restore to original shape (xxx, rows, cols, time)
    outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] +
             [vid_dim - 2])
    return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def build_model(shared_params, options):
    trng = RandomStreams(1234)
    drop_ratio = options['drop_ratio']
    batch_size = options['batch_size']
    n_dim = options['n_dim']

    w_emb = shared_params['w_emb']

    dropout = theano.shared(numpy.float32(0.))
    image_feat = T.ftensor3('image_feat')
    # batch_size x T
    input_idx = T.imatrix('input_idx')
    input_mask = T.matrix('input_mask')
    # label is the TRUE label
    label = T.ivector('label')

    empty_word = theano.shared(value=np.zeros((1, options['n_emb']),
                                              dtype='float32'),
                               name='empty_word')
    w_emb_extend = T.concatenate([empty_word, shared_params['w_emb']],
                                 axis=0)
    input_emb = w_emb_extend[input_idx]

    # a trick here, set the maxpool_h/w to be large
    # maxpool_shape = (options['maxpool_h'], options['maxpool_w'])

    # turn those appending words into zeros
    # batch_size x T x n_emb
    input_emb = input_emb * input_mask[:, :, None]
    if options['sent_drop']:
        input_emb = dropout_layer(input_emb, dropout, trng, drop_ratio)

    if options['use_unigram_conv']:
        unigram_conv_feat = fflayer(shared_params, input_emb, options,
                                    prefix='conv_unigram',
                                    act_func=options.get('sent_conv_act', 'tanh'))
        unigram_pool_feat = unigram_conv_feat.max(axis=1)
    if options['use_bigram_conv']:
        idx = T.concatenate([T.arange(input_emb.shape[1])[:-1],
                             T.arange(input_emb.shape[1])[1:]]).reshape((2, input_emb.shape[1] - 1)).transpose().flatten()
        bigram_emb = T.reshape(input_emb[:, idx, :], (input_emb.shape[0],
                                                      input_emb.shape[1] - 1,
                                                      2 * input_emb.shape[2]))
        bigram_conv_feat = fflayer(shared_params, bigram_emb,
                                   options, prefix='conv_bigram',
                                   act_func=options.get('sent_conv_act', 'tanh'))
        bigram_pool_feat = bigram_conv_feat.max(axis=1)
    if options['use_trigram_conv']:
        idx = T.concatenate([T.arange(input_emb.shape[1])[:-2],
                             T.arange(input_emb.shape[1])[1:-1],
                             T.arange(input_emb.shape[1])[2:]]).reshape((3, input_emb.shape[1] - 2)).transpose().flatten()
        trigram_emb = T.reshape(input_emb[:, idx, :], (input_emb.shape[0],
                                                      input_emb.shape[1] - 2,
                                                      3 * input_emb.shape[2]))
        trigram_conv_feat = fflayer(shared_params, trigram_emb,
                                    options, prefix='conv_trigram',
                                    act_func=options.get('sent_conv_act', 'tanh'))
        trigram_pool_feat = trigram_conv_feat.max(axis=1)  #

    pool_feat = T.concatenate([unigram_pool_feat,
                               bigram_pool_feat,
                               trigram_pool_feat], axis=1)

    image_feat_down = fflayer(shared_params, image_feat, options,
                              prefix='image_mlp',
                              act_func=options.get('image_mlp_act',
                                                   'tanh'))
    if options.get('use_before_attention_drop', False):
        image_feat_down = dropout_layer(image_feat_down, dropout, trng, drop_ratio)
        pool_feat = dropout_layer(pool_feat, dropout, trng, drop_ratio)

    # attention model begins here
    # first layer attention model
    image_feat_attention_1 = fflayer(shared_params, image_feat_down, options,
                                     prefix='image_att_mlp_1',
                                     act_func=options.get('image_att_mlp_act',
                                                          'tanh'))
    pool_feat_attention_1 = fflayer(shared_params, pool_feat, options,
                                    prefix='sent_att_mlp_1',
                                    act_func=options.get('sent_att_mlp_act',
                                                         'tanh'))
    combined_feat_attention_1 = image_feat_attention_1 + \
                                pool_feat_attention_1[:, None, :]
    if options['use_attention_drop']:
        combined_feat_attention_1 = dropout_layer(combined_feat_attention_1,
                                                  dropout, trng, drop_ratio)

    combined_feat_attention_1 = fflayer(shared_params,
                                        combined_feat_attention_1, options,
                                        prefix='combined_att_mlp_1',
                                        act_func=options.get(
                                            'combined_att_mlp_act',
                                            'tanh'))
    prob_attention_1 = T.nnet.softmax(combined_feat_attention_1[:, :, 0])

    image_feat_ave_1 = (prob_attention_1[:, :, None] * image_feat_down).sum(axis=1)

    combined_hidden_1 = image_feat_ave_1 + pool_feat
    # second layer attention model

    image_feat_attention_2 = fflayer(shared_params, image_feat_down, options,
                                     prefix='image_att_mlp_2',
                                     act_func=options.get('image_att_mlp_act',
                                                          'tanh'))
    pool_feat_attention_2 = fflayer(shared_params, combined_hidden_1, options,
                                    prefix='sent_att_mlp_2',
                                    act_func=options.get('sent_att_mlp_act',
                                                         'tanh'))
    combined_feat_attention_2 = image_feat_attention_2 + \
                                pool_feat_attention_2[:, None, :]
    if options['use_attention_drop']:
        combined_feat_attention_2 = dropout_layer(combined_feat_attention_2,
                                                  dropout, trng, drop_ratio)

    combined_feat_attention_2 = fflayer(shared_params,
                                        combined_feat_attention_2, options,
                                        prefix='combined_att_mlp_2',
                                        act_func=options.get(
                                            'combined_att_mlp_act', 'tanh'))
    prob_attention_2 = T.nnet.softmax(combined_feat_attention_2[:, :, 0])

    image_feat_ave_2 = (prob_attention_2[:, :, None] * image_feat_down).sum(axis=1)

    if options.get('use_final_image_feat_only', False):
        combined_hidden = image_feat_ave_2 + pool_feat
    else:
        combined_hidden = image_feat_ave_2 + combined_hidden_1


    for i in range(options['combined_num_mlp']):
        if options.get('combined_mlp_drop_%d'%(i), False):
            combined_hidden = dropout_layer(combined_hidden, dropout, trng,
                                            drop_ratio)
        if i == options['combined_num_mlp'] - 1:
            combined_hidden = fflayer(shared_params, combined_hidden, options,
                                      prefix='combined_mlp_%d'%(i),
                                      act_func='linear')
        else:
            combined_hidden = fflayer(shared_params, combined_hidden, options,
                                      prefix='combined_mlp_%d'%(i),
                                      act_func=options.get('combined_mlp_act_%d'%(i),
                                                           'tanh'))

    # drop the image output
    prob = T.nnet.softmax(combined_hidden)
    prob_y = prob[T.arange(prob.shape[0]), label]
    pred_label = T.argmax(prob, axis=1)
    # sum or mean?
    cost = -T.mean(T.log(prob_y))
    accu = T.mean(T.eq(pred_label, label))

    # return image_feat, input_idx, input_mask, \
        # label, dropout, cost, accu
    return image_feat, input_idx, input_mask, \
        label, dropout, cost, accu, pred_label, \
        prob_attention_1, prob_attention_2
示例#39
0
def run_model(index, in_dir, out_dir, data_filename, func_filename,
              struct_filename, dist_filename, kernel, n, sample_size,
              tune_size):
    """
    index: data
    in_dir: set up work directory
    out_dir: save the trace as csv in the out directory
    data_filename: filename for time series data
    func_filename: filename for functional connectivity
    struct_filename: filename for structural connectivity
    dist_filename: filename for distribution matrix of n ROIs 
    kernel: "exponential" or "gaussian" or "matern52" or "matern32"
    n: ROI number
    sample_size: NUTS number
    tune_size: burning number
    """
    os.chdir(in_dir + str(index))
    Y = get_data(data_filename)
    mFunc = get_func(func_filename, n)
    Struct = get_struct(struct_filename, n)
    Dist = get_dist(dist_filename, n)
    m = Dist[0].shape[0]
    k = Y.shape[1]
    n_vec = n * (n + 1) // 2
    Y_mean = []
    for i in range(n):
        Y_mean.append(np.mean(Y[i * m:(i + 1) * m, 0]))
    Y_mean = np.array(Y_mean)

    with pm.Model() as model_generator:

        # convariance matrix
        log_Sig = pm.Uniform("log_Sig", -8, 8, shape=(n, ))
        SQ = tt.diag(tt.sqrt(tt.exp(log_Sig)))
        Func_Covm = tt.dot(tt.dot(SQ, mFunc), SQ)
        Struct_Convm = tt.dot(tt.dot(SQ, Struct), SQ)

        # double fusion of structural and FC
        L_fc_vec = tt.reshape(
            tt.slinalg.cholesky(tt.squeeze(Func_Covm)).T[np.triu_indices(n)],
            (n_vec, ))
        L_st_vec = tt.reshape(
            tt.slinalg.cholesky(
                tt.squeeze(Struct_Convm)).T[np.triu_indices(n)], (n_vec, ))
        Struct_vec = tt.reshape(Struct[np.triu_indices(n)], (n_vec, ))
        lambdaw = pm.Beta("lambdaw", alpha=1, beta=1, shape=(n_vec, ))
        Kf = pm.Beta("Kf", alpha=1, beta=1, shape=(n_vec, ))
        rhonn = Kf*( (1-lambdaw)*L_fc_vec + lambdaw*L_st_vec ) + \
            (1-Kf)*( (1-Struct_vec*lambdaw)*L_fc_vec + Struct_vec*lambdaw*L_st_vec )

        # correlation
        Cov_temp = tt.triu(tt.ones((n, n)))
        Cov_temp = tt.set_subtensor(Cov_temp[np.triu_indices(n)], rhonn)
        Cov_mat_v = tt.dot(Cov_temp.T, Cov_temp)
        d = tt.sqrt(tt.diagonal(Cov_mat_v))
        rho = (Cov_mat_v.T / d).T / d
        rhoNew = pm.Deterministic("rhoNew", rho[np.triu_indices(n, 1)])

        # temporal correlation AR(1)
        phi_T = pm.Uniform("phi_T", 0, 1, shape=(n, ))
        sigW_T = pm.Uniform("sigW_T", 0, 100, shape=(n, ))
        B = pm.Normal("B", 0, 100, shape=(n, ))
        muW1 = Y_mean - B  # get the shifted mean
        mean_overall = muW1 / (1.0 - phi_T)  # AR(1) mean
        tau_overall = (1.0 - tt.sqr(phi_T)) / tt.sqr(sigW_T)  # AR (1) variance
        W_T = pm.MvNormal("W_T",
                          mu=mean_overall,
                          tau=tt.diag(tau_overall),
                          shape=(k, n))

        # add all parts together
        one_m_vec = tt.ones((m, 1))
        one_k_vec = tt.ones((1, k))
        D = pm.MvNormal("D", mu=tt.zeros(n), cov=Cov_mat_v, shape=(n, ))
        phi_s = pm.Uniform("phi_s", 0, 20, shape=(n, ))
        spat_prec = pm.Uniform("spat_prec", 0, 100, shape=(n, ))
        H_base = pm.Normal("H_base", 0, 1, shape=(m, n))

        Mu_all = tt.zeros((m * n, k))
        if kernel == "exponential":
            for i in range(n):
                r = Dist[i] * phi_s[i]
                H_temp = tt.sqr(spat_prec[i]) * tt.exp(-r)
                L_H_temp = tt.slinalg.cholesky(H_temp)
                Mu_all_update = tt.set_subtensor(Mu_all[m*i:m*(i+1), :], B[i] + D[i] + one_m_vec*W_T[:,i] + \
                    tt.dot(L_H_temp, tt.reshape(H_base[:,i], (m, 1)))*one_k_vec)
                Mu_all = Mu_all_update
        elif kernel == "gaussian":
            for i in range(n):
                r = Dist[i] * phi_s[i]
                H_temp = tt.sqr(spat_prec[i]) * tt.exp(-tt.sqr(r) * 0.5)
                L_H_temp = tt.slinalg.cholesky(H_temp)
                Mu_all_update = tt.set_subtensor(Mu_all[m*i:m*(i+1), :], B[i] + D[i] + one_m_vec*W_T[:,i] + \
                    tt.dot(L_H_temp, tt.reshape(H_base[:,i], (m, 1)))*one_k_vec)
                Mu_all = Mu_all_update
        elif kernel == "matern52":
            for i in range(n):
                r = Dist[i] * phi_s[i]
                H_temp = tt.sqr(spat_prec[i]) * (
                    (1.0 + tt.sqrt(5.0) * r + 5.0 / 3.0 * tt.sqr(r)) *
                    tt.exp(-1.0 * tt.sqrt(5.0) * r))
                L_H_temp = tt.slinalg.cholesky(H_temp)
                Mu_all_update = tt.set_subtensor(Mu_all[m*i:m*(i+1), :], B[i] + D[i] + one_m_vec*W_T[:,i] + \
                    tt.dot(L_H_temp, tt.reshape(H_base[:,i], (m, 1)))*one_k_vec)
                Mu_all = Mu_all_update
        elif kernel == "matern32":
            for i in range(n):
                r = Dist[i] * phi_s[i]
                H_temp = tt.sqr(spat_prec[i]) * (
                    1.0 + tt.sqrt(3.0) * r) * tt.exp(-tt.sqrt(3.0) * r)
                L_H_temp = tt.slinalg.cholesky(H_temp)
                Mu_all_update = tt.set_subtensor(Mu_all[m*i:m*(i+1), :], B[i] + D[i] + one_m_vec*W_T[:,i] + \
                    tt.dot(L_H_temp, tt.reshape(H_base[:,i], (m, 1)))*one_k_vec)
                Mu_all = Mu_all_update

        sigma_error_prec = pm.Uniform("sigma_error_prec", 0, 100)
        Y1 = pm.Normal("Y1", mu=Mu_all, sd=sigma_error_prec, observed=Y)

    with model_generator:
        step = pm.NUTS()
        trace = pm.sample(sample_size, step=step, tune=tune_size, chains=1)

    # save as pandas format and output the csv file
    save_trace = pm.trace_to_dataframe(trace)
    save_trace.to_csv(out_dir + date.today().strftime("%m_%d_%y") + \
        "_sample_size_" + str(sample_size) + "_index_" + str(index) + ".csv")
示例#40
0
 def set_output(self):
     self._output = tensor.reshape(self._prev_layer.output, self._output_shape)
示例#41
0
def interpolate_bilinear(im, x, y, out_shape=None, border_mode='nearest'):
    if im.ndim != 4:
        raise TypeError('im should be a 4D Tensor image, got %dD.' % im.ndim)

    out_shape = out_shape if out_shape else T.shape(im)[2:]
    x, y = x.flatten(), y.flatten()
    n, c, h, w = im.shape
    h_out, w_out = out_shape
    height_f = T.cast(h, theano.config.floatX)
    width_f = T.cast(w, theano.config.floatX)

    # scale coordinates from [-1, 1] to [0, width/height - 1]
    x = (x + 1) / 2 * (width_f - 1)
    y = (y + 1) / 2 * (height_f - 1)

    x0_f = T.floor(x)
    y0_f = T.floor(y)
    x1_f = x0_f + 1
    y1_f = y0_f + 1

    if border_mode == 'nearest':
        x0 = T.clip(x0_f, 0, width_f - 1)
        x1 = T.clip(x1_f, 0, width_f - 1)
        y0 = T.clip(y0_f, 0, height_f - 1)
        y1 = T.clip(y1_f, 0, height_f - 1)
    elif border_mode == 'mirror':
        w = 2 * (width_f - 1)
        x0 = T.minimum(x0_f % w, -x0_f % w)
        x1 = T.minimum(x1_f % w, -x1_f % w)
        h = 2 * (height_f - 1)
        y0 = T.minimum(y0_f % h, -y0_f % h)
        y1 = T.minimum(y1_f % h, -y1_f % h)
    elif border_mode == 'wrap':
        x0 = T.mod(x0_f, width_f)
        x1 = T.mod(x1_f, width_f)
        y0 = T.mod(y0_f, height_f)
        y1 = T.mod(y1_f, height_f)
    else:
        raise ValueError("border_mode must be one of "
                         "'nearest', 'mirror', 'wrap'")
    x0, x1, y0, y1 = (T.cast(v, 'int64') for v in (x0, x1, y0, y1))

    base = T.arange(n) * w * h
    base = T.reshape(base, (-1, 1))
    base = T.tile(base, (1, h_out * w_out))
    base = base.flatten()

    base_y0 = base + y0 * w
    base_y1 = base + y1 * w
    idx_a = base_y0 + x0
    idx_b = base_y1 + x0
    idx_c = base_y0 + x1
    idx_d = base_y1 + x1

    im_flat = T.reshape(im.dimshuffle((0, 2, 3, 1)), (-1, c))
    pixel_a = im_flat[idx_a]
    pixel_b = im_flat[idx_b]
    pixel_c = im_flat[idx_c]
    pixel_d = im_flat[idx_d]

    wa = ((x1_f - x) * (y1_f - y)).dimshuffle((0, 'x'))
    wb = ((x1_f - x) * (1. - (y1_f - y))).dimshuffle((0, 'x'))
    wc = ((1. - (x1_f - x)) * (y1_f - y)).dimshuffle((0, 'x'))
    wd = ((1. - (x1_f - x)) * (1. - (y1_f - y))).dimshuffle((0, 'x'))

    output = T.sum((wa * pixel_a, wb * pixel_b, wc * pixel_c, wd * pixel_d),
                   axis=0)
    output = T.reshape(output, (n, h_out, w_out, c))
    return output.dimshuffle((0, 3, 1, 2))
示例#42
0
    def __init__(self, data_dir, word2vec, word_vector_size, dim,
                mode, answer_module, memory_hops, batch_size, l2,
                normalize_attention, batch_norm, dropout, **kwargs):
        
        print "==> not used params in DMN class:", kwargs.keys()

        self.data_dir = data_dir
        
        self.word2vec = word2vec
        self.word_vector_size = word_vector_size
        self.dim = dim
        self.mode = mode
        self.answer_module = answer_module
        self.memory_hops = memory_hops
        self.batch_size = batch_size
        self.l2 = l2
        self.normalize_attention = normalize_attention
        self.batch_norm = batch_norm
        self.dropout = dropout

        self.vocab, self.ivocab = self._load_vocab(self.data_dir)

        self.train_story = None
        self.test_story = None
        self.train_dict_story, self.train_features, self.train_fns_dict, self.train_num_imgs = self._process_input_sind(self.data_dir, 'train')
        self.test_dict_story, self.test_features, self.test_fns_dict, self.test_num_imgs = self._process_input_sind(self.data_dir, 'val')

        self.train_story = self.train_dict_story.keys()
        self.test_story = self.test_dict_story.keys()
        self.vocab_size = len(self.vocab)
        
        self.q_var = T.matrix('q_var') # Now, it's a batch * image_sieze.
        self.answer_var = T.imatrix('answer_var') # answer of example in minibatch
        self.answer_mask = T.matrix('answer_mask')
        self.answer_inp_var = T.tensor3('answer_inp_var') # answer of example in minibatch
        
        print "==> building question module"
        # Now, share the parameter with the input module.
        q_var_shuffled = self.q_var.dimshuffle(1,0)
        self.W_inp_emb_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.cnn_dim))
        self.b_inp_emb_in = nn_utils.constant_param(value=0.0, shape=(self.dim,))

        q_hist = T.dot(self.W_inp_emb_in, q_var_shuffled) + self.b_inp_emb_in.dimshuffle(0,'x')

        q_hist_shuffled = q_hist.dimshuffle(1,0)

        if self.batch_norm:
            logging.info("Using batch normalization.")
        q_net = layers.InputLayer(shape=(self.batch_size, self.dim), input_var=q_hist_shuffled)
        if self.batch_norm:
            q_net = layers.BatchNormLayer(incoming=q_net)
        if self.dropout > 0 and self.mode == 'train':
            q_net = layers.DropoutLayer(q_net, p=self.dropout)
        #last_mem = layers.get_output(q_net).dimshuffle((1, 0))
        self.q_q = layers.get_output(q_net).dimshuffle(1,0)

        print "==> building answer module"

        answer_inp_var_shuffled = self.answer_inp_var.dimshuffle(1,2,0)
        #self.W_mem_emb = nn_utils.normal_param(std = 0.1, shape = (self.dim, self.dim))
        self.W_inp_emb = nn_utils.normal_param(std = 0.1, shape = (self.dim, self.vocab_size + 1))

        def _dot2(x, W):
            return  T.dot(W, x)

        answer_inp_var_shuffled_emb,_ = theano.scan(fn = _dot2, sequences = answer_inp_var_shuffled,
                non_sequences = self.W_inp_emb ) # seq x dim x batch
        

        mem_ans = self.q_q
        mem_ans_dim = mem_ans.dimshuffle('x',0,1)

        answer_inp = T.concatenate([mem_ans_dim, answer_inp_var_shuffled_emb], axis = 0)
        
        dummy = theano.shared(np.zeros((self.dim, self.batch_size), dtype=floatX))

        self.W_a = nn_utils.normal_param(std=0.1, shape=(self.vocab_size + 1, self.dim))
        
        self.W_ans_res_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        self.W_ans_res_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        self.b_ans_res = nn_utils.constant_param(value=0.0, shape=(self.dim,))
        
        self.W_ans_upd_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        self.W_ans_upd_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        self.b_ans_upd = nn_utils.constant_param(value=0.0, shape=(self.dim,))
        
        self.W_ans_hid_in = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        self.W_ans_hid_hid = nn_utils.normal_param(std=0.1, shape=(self.dim, self.dim))
        self.b_ans_hid = nn_utils.constant_param(value=0.0, shape=(self.dim,))

        logging.info('answer_inp size')

        #last_mem = printing.Print('prob_sm')(last_mem)
        results, _ = theano.scan(fn = self.answer_gru_step,
                sequences = answer_inp,
                outputs_info = [ dummy ])

        prob,_ = theano.scan(fn = lambda x, w: T.dot(w, x), sequences = results, non_sequences = self.W_a )
        preds = prob[1:,:,:]
        prob = prob[1:-1,:,:]

        prob_shuffled = prob.dimshuffle(2,0,1) # b * len * vocab
        preds_shuffled = preds.dimshuffle(2,0,1)


        logging.info("prob shape.")
        #print prob.shape.eval({self.input_var: np.random.rand(10,4,4096).astype('float32'),
        #    self.q_var: np.random.rand(10, 4096).astype('float32'), 
        #    self.answer_inp_var: np.random.rand(10, 18, 8001).astype('float32')})

        n = prob_shuffled.shape[0] * prob_shuffled.shape[1]
        n_preds = preds_shuffled.shape[0] * preds_shuffled.shape[1]

        prob_rhp = T.reshape(prob_shuffled, (n, prob_shuffled.shape[2]))
        preds_rhp = T.reshape(preds_shuffled, (n_preds, preds_shuffled.shape[2]))

        prob_sm = nn_utils.softmax_(prob_rhp)
        preds_sm = nn_utils.softmax_(preds_rhp)
        self.prediction = prob_sm # this one is for the training.

        # This one is for the beamsearch.
        self.pred = T.reshape(preds_sm, (preds_shuffled.shape[0], preds_shuffled.shape[1], preds_shuffled.shape[2]))

        mask =  T.reshape(self.answer_mask, (n,))
        lbl = T.reshape(self.answer_var, (n,))

        self.params = [self.W_a,self.W_ans_res_in, self.W_ans_res_hid, self.b_ans_res, 
                              self.W_ans_upd_in, self.W_ans_upd_hid, self.b_ans_upd,
                              self.W_ans_hid_in, self.W_ans_hid_hid, self.b_ans_hid,
                              self.W_inp_emb_in, self.b_inp_emb_in,
                              self.W_inp_emb]
                              
        print "==> building loss layer and computing updates"
        loss_vec = T.nnet.categorical_crossentropy(prob_sm, lbl)
        self.loss_ce = (mask * loss_vec ).sum() / mask.sum() 

        #self.loss_ce = T.nnet.categorical_crossentropy(results_rhp, lbl)
            
        if self.l2 > 0:
            self.loss_l2 = self.l2 * nn_utils.l2_reg(self.params)
        else:
            self.loss_l2 = 0
        
        self.loss = self.loss_ce + self.loss_l2

        grad = T.grad(self.loss, self.params)
        #scaled_grad = lasagne.updates.norm_constraint(grad, max_norm = 1e4)
        updates = lasagne.updates.adadelta(self.loss, self.params, learning_rate = 0.01)
        #updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.001)
        
        if self.mode == 'train':
            print "==> compiling train_fn"
            self.train_fn = theano.function(inputs=[self.q_var, self.answer_var, self.answer_mask, self.answer_inp_var], 
                                            outputs=[self.prediction, self.loss],
                                            updates=updates)
        
        print "==> compiling test_fn"
        self.test_fn = theano.function(inputs=[self.q_var, self.answer_var, self.answer_mask, self.answer_inp_var],
                                       outputs=[self.prediction, self.loss])
        
    
        print "==> compiling pred_fn"
        self.pred_fn= theano.function(inputs=[self.q_var, self.answer_inp_var],
                                       outputs=[self.pred])
示例#43
0
def conv2d(
        input,
        filters,
        image_shape=None,
        filter_shape=None,
        border_mode="valid",
        subsample=(1, 1),
        **kargs,
):
    """
    signal.conv.conv2d performs a basic 2D convolution of the input with the
    given filters. The input parameter can be a single 2D image or a 3D tensor,
    containing a set of images. Similarly, filters can be a single 2D filter or
    a 3D tensor, corresponding to a set of 2D filters.

    Shape parameters are optional and will result in faster execution.

    Parameters
    ----------
    input   : Symbolic theano tensor for images to be filtered.
              Dimensions: ([num_images], image height, image width)
    filters : Symbolic theano tensor for convolution filter(s).
              Dimensions: ([num_filters], filter height, filter width)
    border_mode: {'valid', 'full'}
        See scipy.signal.convolve2d.
    subsample
        Factor by which to subsample output.
    image_shape : tuple of length 2 or 3
        ([num_images,] image height, image width).
    filter_shape : tuple of length 2 or 3
        ([num_filters,] filter height, filter width).
    kwargs
        See theano.tensor.nnet.conv.conv2d.

    Returns
    -------
    symbolic 2D,3D or 4D tensor
        Tensor of filtered images, with shape
        ([number images,] [number filters,] image height, image width).

    """
    assert input.ndim in (2, 3)
    assert filters.ndim in (2, 3)

    # use shape information if it is given to us ###
    if filter_shape and image_shape:
        if input.ndim == 3:
            bsize = image_shape[0]
        else:
            bsize = 1
        imshp = (1, ) + tuple(image_shape[-2:])

        if filters.ndim == 3:
            nkern = filter_shape[0]
        else:
            nkern = 1
        kshp = filter_shape[-2:]
    else:
        nkern, kshp = None, None
        bsize, imshp = None, None

    # reshape tensors to 4D, for compatibility with ConvOp ###
    if input.ndim == 3:
        sym_bsize = input.shape[0]
    else:
        sym_bsize = 1

    if filters.ndim == 3:
        sym_nkern = filters.shape[0]
    else:
        sym_nkern = 1

    new_input_shape = tensor.join(0, tensor.stack([sym_bsize, 1]),
                                  input.shape[-2:])
    input4D = tensor.reshape(input, new_input_shape, ndim=4)

    new_filter_shape = tensor.join(0, tensor.stack([sym_nkern, 1]),
                                   filters.shape[-2:])
    filters4D = tensor.reshape(filters, new_filter_shape, ndim=4)

    # perform actual convolution ###
    op = conv.ConvOp(
        output_mode=border_mode,
        dx=subsample[0],
        dy=subsample[1],
        imshp=imshp,
        kshp=kshp,
        nkern=nkern,
        bsize=bsize,
        **kargs,
    )

    output = op(input4D, filters4D)

    # flatten to 3D tensor if convolving with single filter or single image
    if input.ndim == 2 and filters.ndim == 2:
        if theano.config.warn__signal_conv2d_interface:
            warnings.warn(
                "theano.tensor.signal.conv2d() now outputs a 2d tensor when both"
                " inputs are 2d. To disable this warning, set the Theano flag"
                " warn__signal_conv2d_interface to False",
                stacklevel=3,
            )

        output = tensor.flatten(output.T, ndim=2).T
    elif input.ndim == 2 or filters.ndim == 2:
        output = tensor.flatten(output.T, ndim=3).T

    return output
import numpy as np
import matplotlib.pyplot as plt
import Updates
import qqplot
import time

theano.config.floatX = 'float32'


#3 elements.  
#Discriminator on true point
#Discriminator on generated point (these share the same parameters)
#Generator

poolSize = 5
maxout = lambda vector: T.max(T.reshape(vector, (vector.shape[0], vector.shape[1] / poolSize, poolSize)), axis = 2)
relu = lambda vector: T.maximum(0.0, vector)
activation = maxout


n = 20000
td1 = np.random.gamma(1.0,2.0, n / 2)
td2 = np.random.normal(-3.0,2.0, n / 2)
true_dist = td1.tolist() + td2.tolist()
random.shuffle(true_dist)
true_dist = np.asarray(true_dist)
#true_dist = np.random.binomial(1, 0.5, n)

mean = true_dist.mean()
stdv = np.sqrt(true_dist.var())
def inner_fn(t, stm1, postm1, vtm1,\
r_Wq_hst_ot, r_Wq_hst_oht, r_Wq_hst_oat, r_Wq_hst_stm1, r_bq_hst,\
r_Wq_hst2_hst, r_bq_hst2,\
r_Wq_stmu_hst2, r_bq_stmu,\
r_Wq_stsig_hst2, r_bq_stsig,\
r_Wl_stmu_stm1, r_bl_stmu,\
r_Wl_stsig_stm1, r_bl_stsig,\
r_Wl_ost_st, r_bl_ost,\
r_Wl_ost2_ost, r_bl_ost2,\
r_Wl_ost3_ost2, r_bl_ost3,\
r_Wl_otmu_st, r_bl_otmu,\
r_Wl_otsig_st, r_bl_otsig,\
r_Wl_ohtmu_st, r_bl_ohtmu,\
r_Wl_ohtsig_st, r_bl_ohtsig,\
r_Wl_oatmu_st, r_bl_oatmu,\
r_Wl_oatsig_st, r_bl_oatsig,\
r_Wa_aht_st, r_ba_aht,\
r_Wa_atmu_aht, r_ba_atmu,\
r_Wa_atsig_aht, r_ba_atsig\
):
   
    # Use hidden state to generate action state
    aht = T.batched_tensordot(r_Wa_aht_st, T.reshape(stm1,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_ba_aht
    #aht2 = T.batched_tensordot(r_Wa_aht2_aht, T.reshape(aht,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_ba_aht2
    #aht3 = T.batched_tensordot(r_Wa_aht3_aht2, T.reshape(aht2,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_ba_aht3
    at_mu = T.batched_tensordot(r_Wa_atmu_aht, T.reshape(aht,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_ba_atmu
    at_sig = T.nnet.softplus( T.batched_tensordot(r_Wa_atsig_aht, T.reshape(aht,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_ba_atsig ) + sig_min_action
    
    # Sample Action
    at = at_mu + theano_rng.normal((n_perturbations,n_oa,n_proc))*at_sig
    
    # Update Environment
    action_force = T.tanh( at )
    force = T.switch(T.lt(postm1,0.0),-2*postm1 - 1,-T.pow(1+5*T.sqr(postm1),-0.5)-T.sqr(postm1)*T.pow(1 + 5*T.sqr(postm1),-1.5)-T.pow(postm1,4)/16.0) - 0.25*vtm1
    vt = vtm1 + 0.05*force + 0.03*action_force
    post = postm1 + vt     
    
    # Generate Sensory Inputs:
    
    # 1.) Observation of Last Action
    oat = at
    
    # 2.) Noisy Observation of Current Position
    ot = post + theano_rng.normal((n_perturbations,n_o,n_proc))*0.01
    
    # 3.) Nonlinear Transformed Sensory Channel
    oht = T.exp(-T.sqr(post-1.0)/2.0/0.3/0.3)
   
    # Infer hidden state from last hidden state and current observations, using variational density
    hst =  T.nnet.relu( T.batched_tensordot(r_Wq_hst_stm1,T.reshape(stm1,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + T.batched_tensordot(r_Wq_hst_ot,T.reshape(ot,(n_perturbations,n_o,n_proc)),axes=[[2],[1]]) + T.batched_tensordot(r_Wq_hst_oht,T.reshape(oht,(n_perturbations,n_oh,n_proc)),axes=[[2],[1]]) + T.batched_tensordot(r_Wq_hst_oat,T.reshape(oat,(n_perturbations,n_oa,n_proc)),axes=[[2],[1]]) + r_bq_hst )
    hst2 =  T.nnet.relu( T.batched_tensordot(r_Wq_hst2_hst,T.reshape(hst,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bq_hst2 )

    stmu =  T.tanh( T.batched_tensordot(r_Wq_stmu_hst2,T.reshape(hst2,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bq_stmu )
    stsig = T.nnet.softplus( T.batched_tensordot(r_Wq_stsig_hst2,T.reshape(hst2,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bq_stsig ) + sig_min_states
    
    # Explicitly encode position as homeostatic state variable
    # Rescale representation to fit within linear response of the tanh-nonlinearity
    stmu = T.set_subtensor(stmu[:,0,:],0.1*ot[:,0,:]).reshape((n_perturbations,n_s,n_proc))
    stsig = T.set_subtensor(stsig[:,0,:],0.005).reshape((n_perturbations,n_s,n_proc))
    
    # Sample from variational density
    st = stmu + theano_rng.normal((n_perturbations,n_s,n_proc))*stsig
    
    # Calculate parameters of likelihood distributions from sampled state
    ost = T.nnet.relu( T.batched_tensordot(r_Wl_ost_st,T.reshape(st,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bl_ost )
    ost2 = T.nnet.relu( T.batched_tensordot(r_Wl_ost2_ost,T.reshape(ost,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bl_ost2 )
    ost3 = T.nnet.relu( T.batched_tensordot(r_Wl_ost3_ost2,T.reshape(ost2,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bl_ost3 )
    
    otmu = T.batched_tensordot(r_Wl_otmu_st, T.reshape(ost3,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bl_otmu
    otsig = T.nnet.softplus(T.batched_tensordot(r_Wl_otsig_st, T.reshape(ost3,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bl_otsig) + sig_min_obs
    
    ohtmu = T.batched_tensordot(r_Wl_ohtmu_st, T.reshape(ost3,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bl_ohtmu
    ohtsig = T.nnet.softplus( T.batched_tensordot(r_Wl_ohtsig_st, T.reshape(ost3,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bl_ohtsig ) + sig_min_obs
    
    oatmu = T.batched_tensordot(r_Wl_oatmu_st, T.reshape(ost3,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bl_oatmu
    oatsig = T.nnet.softplus( T.batched_tensordot(r_Wl_oatsig_st, T.reshape(ost3,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bl_oatsig ) + sig_min_obs
    
    # Calculate negative log-likelihood of observations
    p_ot  = GaussianNLL(ot, otmu, otsig)
    p_oht = GaussianNLL(oht, ohtmu, ohtsig)    
    p_oat = GaussianNLL(oat, oatmu, oatsig)
    
    # Calculate prior expectation on hidden state from previous state
    prior_stmu = T.tanh( T.batched_tensordot(r_Wl_stmu_stm1, T.reshape(stm1,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bl_stmu )
    prior_stsig = T.nnet.softplus( T.batched_tensordot(r_Wl_stsig_stm1, T.reshape(stm1,(n_perturbations,n_s,n_proc)),axes=[[2],[1]]) + r_bl_stsig ) + sig_min_states
    
    # Explicitly encode expectations on homeostatic state variable
    prior_stmu = ifelse(T.lt(t,20),prior_stmu, T.set_subtensor(prior_stmu[:,0,:],0.1))
    prior_stsig = ifelse(T.lt(t,20),prior_stsig, T.set_subtensor(prior_stsig[:,0,:],0.005))    
   
    # Calculate KL divergence between variational density and prior density
    # using explicit formula for diagonal gaussians
    KL_st = KLGaussianGaussian(stmu, stsig, prior_stmu, prior_stsig)
    
    # Put free energy functional together
    FEt =  KL_st + p_ot + p_oht + p_oat
    
    return st, post, vt, oat, ot, oht, FEt, KL_st, hst, hst2, stmu, stsig, force, p_ot, p_oht, p_oat
    def create_snn(self, layers='None'):
        print 'Building snn...'
        if (layers == 'None'):
            layers = self.layers
        input_layer = InputLayer(
            shape=self.input_shape,
            input_var=T.reshape(self.DoG_maps[0],
                                self.input_shape))  #the input layer of
        #the graph which takes a slice of DoG map.
        all_layers, _ = self.create_net(layers, input_layer)
        self.all_layers = all_layers
        LR = T.scalar()

        def fn(*args):
            #args[0] - input slice of DoG map
            #args[1] - output_spike train
            #args[2] - v_in for snn DenseLayer
            #args[3] - h_in for snn DenseLayer ** not present if snn enabled is false
            #.
            #.
            #.
            args = list(args)
            print(args)
            # for i in range(1,len(args)-1):
            #     args[i]=args[i][0]

            print(len(args))
            print('args')
            print(args)
            i = 2
            for layer in (all_layers[1:]):

                if (layer.snn_enabled):
                    layer.v_in = args[i]
                    layer.H_in = args[i + 1]
                    i += 2
                else:
                    layer.v_in = args[i]
                    i += 1

            all_layers[0].input_var = args[0]
            # #all_layers[0].input_var=T.reshape(args[0],(1,2,28,28))
            output_spike_train = lasagne.layers.get_output(
                all_layers[-1])  #the graph is created
            # print(T.shape(output_spike_train))
            vH_out_list = []
            #H_out_list=[]
            W_dict = []
            #
            for layer in all_layers[1:]:
                vH_out_list.append(layer.v_out)
                if (layer.snn_enabled):
                    layer.do_stdp()
                    vH_out_list.append(layer.H_out)
                    W_dict.append((layer.W, layer.W + LR * layer.update))
            print('fn returning : ')
            # print([output_spike_train]+vH_out_list)

            return [output_spike_train] + vH_out_list, W_dict
            #return vH_out_list

        def set_outputs_info():
            output = []

            #initial_spike_train=T.zeros(all_layers[-1].get_output_shape()[2])
            initial_spike_train = T.zeros(
                (self.batch_size, self.all_layers[-1].num_units))
            print(T.shape(initial_spike_train))

            #output.append(initial_spike_train)

            vH_list = []

            # for layer in all_layers[1:]:
            #     layer.set_inputs(T.vector(),T.tensor4())

            for layer in all_layers[1:]:
                # print(T.zeros(layer.get_output_shape()[0])
                vH_list.append(T.zeros(layer.get_output_shape()[0]))
                if (layer.snn_enabled):
                    # print()
                    vH_list.append(T.zeros(layer.get_output_shape()[1]))

            output = [initial_spike_train] + vH_list
            #output=vH_list

            print(output)
            #output = [T.shape_padleft(a) for a in output]
            # for i,a in enumerate(output):
            #     output[i]=T.shape_padleft(a)

            print('set output info :')
            print(output)
            #print(T.shape(output))
            return output

        # theano.printing.pydotprint(self.DoG_maps, outfile="./debug.png", var_with_name_simple=True)
        components, updates = theano.scan(fn,
                                          sequences=[self.DoG_maps],
                                          non_sequences=LR,
                                          outputs_info=set_outputs_info())

        #print(T.shape(components))
        shape = T.shape(components[0])
        output = T.sum(components[0], axis=0)
        output = T.switch(T.ge(output, 1.0), 1.0, output)
        output = T.cast(output, dtype=theano.config.floatX)  #128x1024
        time_peaked = T.sum(components[0], axis=2)  #32x128
        real_valued = T.argmax(time_peaked, axis=0)
        real_valued = (32 - real_valued) / 32.0
        factor = T.sum(time_peaked, axis=0)  #to take care of no spike
        factor = factor * real_valued  #128,
        factor = T.reshape(factor, [T.shape(factor)[0], 1])
        factor = T.addbroadcast(factor, 1)
        output = output * factor

        delta_weight = T.zeros((1))
        print('*********')
        print(delta_weight)

        for key, value in updates.iteritems():
            delta_weight += T.mean(abs(value - key))

        delta_weight /= len(updates.keys())

        self.train = theano.function(inputs=[self.input, LR],
                                     outputs=[components[0], delta_weight],
                                     updates=updates,
                                     on_unused_input='ignore')

        self.test = theano.function(inputs=[self.input, LR], outputs=output)

        print('compiled')
示例#47
0
    def generate(self, state):
        generated = self.generator(state)
        generated = T.reshape(generated, [self.n_sam, self.batch_size, 28, 28])

        return generated
示例#48
0
sampler = sampling.AudioFileSampler.load(path + "/sampler.p")


def make_random(batch_size):
    return numpy.random.randn(
        batch_size, generator.gen_dim
    )  # * numpy.random.randint(0, 2, [generator.dim[0], batch_size])


################## network ###################3

z = T.dmatrix('z')
batch_size = z.shape[0]

x_gen = generator(T.reshape(z,
                            [-1, 1, generator.gen_dim]))  ### !!!!!!!!!!!!!!!!

x_in = T.dtensor3('x_in')

cost_gen = -sampling.energyDstTheano2(T.reshape(x_gen, [batch_size, 1, -1]),
                                      T.reshape(x_in, [batch_size, 1, -1]))

################# descent ##################333
param_gen = generator.getParameters()
grad_gen = generator.getGradients(cost_gen, 1.0)

descent = SimpleDescent.Grad(param_gen, grad_gen)

train = descent.step([x_in, z], [x_gen, cost_gen], 0.01)

###################### training #############
示例#49
0
    def __init__(self,
                 input,
                 n_in,
                 n_hidden,
                 n_out,
                 activation=T.tanh,
                 output_type='real'):

        self.input = input
        self.activation = activation
        self.output_type = output_type

        self.batch_size = T.iscalar()

        # theta is a vector of all trainable parameters
        # it represents the value of W, W_in, W_out, h0, bh, by
        theta_shape = n_hidden ** 2 + n_in * n_hidden + n_hidden * n_out + \
                      n_hidden + n_hidden + n_out
        self.theta = theano.shared(
            value=np.zeros(theta_shape, dtype=theano.config.floatX))

        # Parameters are reshaped views of theta
        param_idx = 0  # pointer to somewhere along parameter vector

        # recurrent weights as a shared variable
        self.W = self.theta[param_idx:(param_idx + n_hidden**2)].reshape(
            (n_hidden, n_hidden))
        self.W.name = 'W'
        W_init = np.asarray(np.random.uniform(size=(n_hidden, n_hidden),
                                              low=-0.01,
                                              high=0.01),
                            dtype=theano.config.floatX)
        param_idx += n_hidden**2

        # input to hidden layer weights
        self.W_in = self.theta[param_idx:(param_idx + n_in * \
                                          n_hidden)].reshape((n_in, n_hidden))
        self.W_in.name = 'W_in'
        W_in_init = np.asarray(np.random.uniform(size=(n_in, n_hidden),
                                                 low=-0.01,
                                                 high=0.01),
                               dtype=theano.config.floatX)
        param_idx += n_in * n_hidden

        # hidden to output layer weights
        self.W_out = self.theta[param_idx:(param_idx + n_hidden * \
                                           n_out)].reshape((n_hidden, n_out))
        self.W_out.name = 'W_out'

        W_out_init = np.asarray(np.random.uniform(size=(n_hidden, n_out),
                                                  low=-0.01,
                                                  high=0.01),
                                dtype=theano.config.floatX)
        param_idx += n_hidden * n_out

        self.h0 = self.theta[param_idx:(param_idx + n_hidden)]
        self.h0.name = 'h0'
        h0_init = np.zeros((n_hidden, ), dtype=theano.config.floatX)
        param_idx += n_hidden

        self.bh = self.theta[param_idx:(param_idx + n_hidden)]
        self.bh.name = 'bh'
        bh_init = np.zeros((n_hidden, ), dtype=theano.config.floatX)
        param_idx += n_hidden

        self.by = self.theta[param_idx:(param_idx + n_out)]
        self.by.name = 'by'
        by_init = np.zeros((n_out, ), dtype=theano.config.floatX)
        param_idx += n_out

        assert (param_idx == theta_shape)

        # for convenience
        self.params = [
            self.W, self.W_in, self.W_out, self.h0, self.bh, self.by
        ]

        # shortcut to norms (for monitoring)
        self.l2_norms = {}
        for param in self.params:
            self.l2_norms[param] = T.sqrt(T.sum(param**2))

        # initialize parameters
        # DEBUG_MODE gives division by zero error when we leave parameters
        # as zeros
        self.theta.set_value(
            np.concatenate([
                x.ravel() for x in (W_init, W_in_init, W_out_init, h0_init,
                                    bh_init, by_init)
            ]))

        self.theta_update = theano.shared(
            value=np.zeros(theta_shape, dtype=theano.config.floatX))

        # recurrent function (using tanh activation function) and linear output
        # activation function
        def step(x_t, h_tm1):
            h_t = self.activation(T.dot(x_t, self.W_in) + \
                                  T.dot(h_tm1, self.W) + self.bh)
            y_t = T.dot(h_t, self.W_out) + self.by
            return h_t, y_t

        # the hidden state `h` for the entire sequence, and the output for the
        # entire sequence `y` (first dimension is always time)
        # Note the implementation of weight-sharing h0 across variable-size
        # batches using T.ones multiplying h0
        [self.h,
         self.y_pred], _ = theano.scan(step,
                                       sequences=self.input,
                                       outputs_info=[
                                           T.alloc(self.h0,
                                                   self.input.shape[1],
                                                   n_hidden), None
                                       ])
        # outputs_info=[T.ones(shape=(self.input.shape[1],
        # self.h0.shape[0])) * self.h0, None])

        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = 0
        self.L1 += abs(self.W.sum())
        self.L1 += abs(self.W_in.sum())
        self.L1 += abs(self.W_out.sum())

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = 0
        self.L2_sqr += (self.W**2).sum()
        self.L2_sqr += (self.W_in**2).sum()
        self.L2_sqr += (self.W_out**2).sum()

        if self.output_type == 'real':
            self.loss = lambda y: self.mse(y)
        elif self.output_type == 'binary':
            # push through sigmoid
            self.p_y_given_x = T.nnet.sigmoid(self.y_pred)  # apply sigmoid
            self.y_out = T.round(self.p_y_given_x)  # round to {0,1}
            self.loss = lambda y: self.nll_binary(y)
        elif self.output_type == 'softmax':
            # push through softmax, computing vector of class-membership
            # probabilities in symbolic form
            #
            # T.nnet.softmax will not operate on T.tensor3 types, only matrices
            # We take our n_steps x n_seq x n_classes output from the net
            # and reshape it into a (n_steps * n_seq) x n_classes matrix
            # apply softmax, then reshape back
            y_p = self.y_pred
            y_p_m = T.reshape(y_p, (y_p.shape[0] * y_p.shape[1], -1))
            y_p_s = T.nnet.softmax(y_p_m)
            self.p_y_given_x = T.reshape(y_p_s, y_p.shape)

            # compute prediction as class whose probability is maximal
            self.y_out = T.argmax(self.p_y_given_x, axis=-1)
            self.loss = lambda y: self.nll_multiclass(y)

        else:
            raise NotImplementedError
示例#50
0
    def build(
            self,
            initial_stepsize,
            n_steps,
            target_acceptance_rate=.65,
            stepsize_dec=0.98,
            stepsize_min=0.0001,
            stepsize_max=0.5,
            stepsize_inc=1.02,
            # used in geometric avg. 1.0 would be not moving at all
            avg_acceptance_slowness=0.9,
            seed=12345,
            init_state=None):

        if init_state is None:
            init_h = np.random.normal(
                0, 1, size=[self.n_sam * self.batch_size,
                            self.hdim]).astype(np.float32)
        else:
            init_h = init_state
            print('load init_state')
        init_m = np.random.randn(self.n_sam * self.batch_size,
                                 self.hdim).astype(np.float32)

        # For HMC
        # h denotes current states
        self.h = sharedX(init_h)
        # m denotes momentum
        t = T.scalar()
        self.generated = self.generate(self.h)
        lld = T.reshape(-self.energy_fn(self.h), [self.n_sam, self.batch_size])
        self.eval_lld = theano.function([t],
                                        lld,
                                        givens={
                                            self.obs: self.obs_val,
                                            self.t: t
                                        })

        # allocate shared variables
        stepsize = sharedX(initial_stepsize)
        avg_acceptance_rate = sharedX(target_acceptance_rate)
        s_rng = TT.shared_randomstreams.RandomStreams(seed)

        # define graph for an `n_steps` HMC simulation
        accept, final_pos = hmc_move(s_rng, self.h, self.energy_fn, stepsize,
                                     n_steps)

        # define the dictionary of updates, to apply on every `simulate` call
        simulate_updates = hmc_updates(
            self.h,
            stepsize,
            avg_acceptance_rate,
            final_pos=final_pos,
            accept=accept,
            stepsize_min=stepsize_min,
            stepsize_max=stepsize_max,
            stepsize_inc=stepsize_inc,
            stepsize_dec=stepsize_dec,
            target_acceptance_rate=target_acceptance_rate,
            avg_acceptance_slowness=avg_acceptance_slowness)

        self.step = theano.function([t], [accept],
                                    updates=simulate_updates,
                                    givens={
                                        self.obs: self.obs_val,
                                        self.t: t
                                    })
        print("Error: unrecognized content layer: {}".format(
            args.content_layer))
        sys.exit(1)
    content_loss = T.sum(T.sqr(cl_X - cl_Xtr)) / T.cast(cl_X.size, floatX)

    # Build the style loss.
    style_loss = 0.
    X.set_value(style_image)
    for layer_name in args.style_layers:
        try:
            sl_X = perceptual_net_X.get_layer(layer_name).output
            sl_Xtr = perceptual_net_Xtr.get_layer(layer_name).output
        except AttributeError:
            print("Error: unrecognized style layer: {}".format(layer_name))
            sys.exit(1)
        slf_X = T.reshape(sl_X, (sl_X.shape[0], sl_X.shape[1], -1))
        gram_X = (
            T.batched_tensordot(slf_X, slf_X.dimshuffle(0, 2, 1), axes=1) /
            T.cast(slf_X.size, floatX)) * T.cast(slf_X.shape[0], floatX)
        slf_Xtr = T.reshape(sl_Xtr, (sl_Xtr.shape[0], sl_Xtr.shape[1], -1))
        gram_Xtr = (
            T.batched_tensordot(slf_Xtr, slf_Xtr.dimshuffle(0, 2, 1), axes=1) /
            T.cast(slf_Xtr.size, floatX)) * T.cast(slf_Xtr.shape[0], floatX)

        get_gram_X = theano.function([], gram_X)
        style_gram = theano.shared(get_gram_X()[0, :, :])
        style_loss = style_loss + T.sum(
            T.sqr(style_gram.dimshuffle("x", 0, 1) - gram_Xtr)) / T.cast(
                Xtr.shape[0], floatX)

    # Build the TV loss.
示例#52
0
    def get_output_for(self, input, **kwargs):
        new_shape = [input.shape[0], 1] + [input.shape[k] for k in xrange(1, input.ndim)]

        output = T.reshape(input, new_shape, ndim=input.ndim + 1)  # see the details in pydoc
        output = T.repeat(output, self._n, axis=1)
        return output
    def fit(self,
            sentences,
            cc_matrix=None,
            learning_rate=10e-5,
            reg=0.1,
            xmax=100,
            alpha=0.75,
            epochs=10,
            gd=False,
            use_theano=True):
        # build co-occurrence matrix
        # paper calls it X, so we will call it X, instead of calling
        # the training data X
        # TODO: would it be better to use a sparse matrix?
        t0 = datetime.now()
        V = self.V
        D = self.D

        if os.path.exists(cc_matrix):
            X = np.load(cc_matrix)
        else:
            X = np.zeros((V, V))
            N = len(sentences)
            print("Number of sentences to process:", N)
            it = 0
            for sentence in sentences:
                it += 1
                if it % 10000 == 0:
                    print("processed", it, "/", N)
                n = len(sentence)
                for i in xrange(n):
                    wi = sentence[i]

                    start = max(0, i - self.context_sz)
                    end = min(n, i + self.context_sz)

                    # we can either choose only one side as context, or both
                    # here we are doing both

                    # make sure "start" and "end" tokens are part of some context
                    # otherwise their f(X) will be 0 (denominator in bias update)
                    if i - self.context_sz < 0:
                        points = 1.0 / (i + 1)
                        X[wi, 0] += points
                        X[0, wi] += points
                    if i + self.context_sz > n:
                        points = 1.0 / (n - i)
                        X[wi, 1] += points
                        X[1, wi] += points

                    for j in xrange(start, i):
                        if j == i: continue
                        wj = sentence[j]
                        points = 1.0 / abs(i - j)  # this is +ve
                        X[wi, wj] += points
                        X[wj, wi] += points
            # save the cc matrix because it takes forever to create
            np.save(cc_matrix, X)

        print("max in X:", X.max())

        # weighting
        fX = np.zeros((V, V))
        fX[X < xmax] = (X[X < xmax] / float(xmax))**alpha
        fX[X >= xmax] = 1

        print("max in f(X):", fX.max())

        # target
        logX = np.log(X + 1)

        print("max in log(X):", logX.max())

        print("time to build co-occurrence matrix:", (datetime.now() - t0))

        # initialize weights
        W = np.random.randn(V, D) / np.sqrt(V + D)
        b = np.zeros(V)
        U = np.random.randn(V, D) / np.sqrt(V + D)
        c = np.zeros(V)
        mu = logX.mean()

        if gd and use_theano:
            thW = theano.shared(W)
            thb = theano.shared(b)
            thU = theano.shared(U)
            thc = theano.shared(c)
            thLogX = T.matrix('logX')
            thfX = T.matrix('fX')

            params = [thW, thb, thU, thc]

            thDelta = thW.dot(thU.T) + T.reshape(thb, (V, 1)) + T.reshape(
                thc, (1, V)) + mu - thLogX
            thCost = (thfX * thDelta * thDelta).sum()

            grads = T.grad(thCost, params)

            updates = [(p, p - learning_rate * g)
                       for p, g in zip(params, grads)]

            train_op = theano.function(
                inputs=[thfX, thLogX],
                updates=updates,
            )

        costs = []
        sentence_indexes = range(len(sentences))
        for epoch in xrange(epochs):
            delta = W.dot(U.T) + b.reshape(V, 1) + c.reshape(1, V) + mu - logX
            cost = (fX * delta * delta).sum()
            costs.append(cost)
            print("epoch:", epoch, "cost:", cost)

            if gd:
                # gradient descent method

                if use_theano:
                    train_op(fX, logX)
                    W = thW.get_value()
                    b = thb.get_value()
                    U = thU.get_value()
                    c = thc.get_value()

                else:
                    # update W
                    oldW = W.copy()
                    for i in xrange(V):
                        W[i] -= learning_rate * (fX[i, :] * delta[i, :]).dot(U)
                    W -= learning_rate * reg * W

                    # update b
                    for i in xrange(V):
                        b[i] -= learning_rate * fX[i, :].dot(delta[i, :])
                    b -= learning_rate * reg * b

                    # update U
                    for j in xrange(V):
                        U[j] -= learning_rate * (fX[:, j] *
                                                 delta[:, j]).dot(oldW)
                    U -= learning_rate * reg * U

                    # update c
                    for j in xrange(V):
                        c[j] -= learning_rate * fX[:, j].dot(delta[:, j])
                    c -= learning_rate * reg * c

            else:
                # ALS method

                # update W
                # fast way
                # t0 = datetime.now()
                for i in xrange(V):
                    # matrix = reg*np.eye(D) + np.sum((fX[i,j]*np.outer(U[j], U[j]) for j in xrange(V)), axis=0)
                    matrix = reg * np.eye(D) + (fX[i, :] * U.T).dot(U)
                    # assert(np.abs(matrix - matrix2).sum() < 10e-5)
                    vector = (fX[i, :] * (logX[i, :] - b[i] - c - mu)).dot(U)
                    W[i] = np.linalg.solve(matrix, vector)
                # print "fast way took:", (datetime.now() - t0)

                # update b
                for i in xrange(V):
                    denominator = fX[i, :].sum()
                    # assert(denominator > 0)
                    numerator = fX[i, :].dot(logX[i, :] - W[i].dot(U.T) - c -
                                             mu)
                    # for j in xrange(V):
                    #     numerator += fX[i,j]*(logX[i,j] - W[i].dot(U[j]) - c[j])
                    b[i] = numerator / denominator / (1 + reg)
                # print "updated b"

                # update U
                for j in xrange(V):
                    matrix = reg * np.eye(D) + (fX[:, j] * W.T).dot(W)
                    vector = (fX[:, j] * (logX[:, j] - b - c[j] - mu)).dot(W)
                    U[j] = np.linalg.solve(matrix, vector)

                # update c
                for j in xrange(V):
                    denominator = fX[:, j].sum()
                    numerator = fX[:, j].dot(logX[:, j] - W.dot(U[j]) - b - mu)
                    c[j] = numerator / denominator / (1 + reg)

        self.W = W
        self.U = U

        plt.plot(costs)
        plt.show()
示例#54
0
def build_model(tparams, options):
    """
    Build up the whole computation graph
    """
    trng = RandomStreams(1234)
    use_noise = theano.shared(numpy.float32(0.))
    last_n = options['last_n']

    # video blocks. (n_timesteps, n_samples, n_annotations, ctxdim)
    x = tensor.tensor4('x', dtype='float32')
    mask = tensor.matrix('mask', dtype='float32')
    n_timesteps = x.shape[0]
    n_samples = x.shape[1]
    n_annotations = x.shape[2]
    ctxdim = x.shape[3]

    # action labels
    y = tensor.tensor3('y', dtype='int64')

    #ctx = tensor.reshape(ctx, (n_timesteps, n_samples, n_annotations, ctxdim))
    ctx = x

    # initial state/cell
    ctx_mean = ctx.mean(
        0)  ### ctx_mean is now (n_samples, n_annotations, ctxdim)
    ctx_mean = ctx_mean.mean(1)  ### you want ctx_mean to be n_samples x ctxdim

    for lidx in xrange(1, options['n_layers_init']):
        ctx_mean = get_layer('ff')[1](tparams,
                                      ctx_mean,
                                      options,
                                      prefix='ff_init_%d' % lidx,
                                      activ='rectifier')
        if options['use_dropout']:
            ctx_mean = dropout_layer(ctx_mean, use_noise, trng)

    init_state = get_layer('ff')[1](tparams,
                                    ctx_mean,
                                    options,
                                    prefix='ff_state',
                                    activ='tanh')
    init_memory = get_layer('ff')[1](tparams,
                                     ctx_mean,
                                     options,
                                     prefix='ff_memory',
                                     activ='tanh')

    # decoder
    proj = get_layer('lstm_cond')[1](tparams,
                                     ctx,
                                     options,
                                     prefix='decoder',
                                     mask=mask,
                                     init_state=init_state,
                                     init_memory=init_memory,
                                     trng=trng,
                                     use_noise=use_noise)
    # collection
    proj_h = proj[0]
    alphas = proj[2]
    ctxs = proj[3]
    if options['selector']:
        sels = proj[4]
    if options['use_dropout']:
        proj_h = dropout_layer(proj_h, use_noise, trng)

    # outputs
    logit = get_layer('ff')[1](tparams,
                               proj_h,
                               options,
                               prefix='ff_logit_lstm',
                               activ='linear')
    if options['ctx2out']:
        logit += get_layer('ff')[1](tparams,
                                    ctxs,
                                    options,
                                    prefix='ff_logit_ctx',
                                    activ='linear')
    logit = tanh(logit)
    if options['use_dropout']:
        logit = dropout_layer(logit, use_noise, trng)
    if options['n_layers_out'] > 1:
        for lidx in xrange(1, options['n_layers_out']):
            logit = get_layer('ff')[1](tparams,
                                       logit,
                                       options,
                                       prefix='ff_logit_h%d' % lidx,
                                       activ='rectifier')
            if options['use_dropout']:
                logit = dropout_layer(logit, use_noise, trng)

    logit = get_layer('ff')[1](tparams,
                               logit,
                               options,
                               prefix='ff_logit',
                               activ='sigmoid')
    logit_shp = logit.shape  #(TS, BS, o/p)

    probs = logit
    probs = probs.reshape([probs.shape[0] * probs.shape[1],
                           probs.shape[2]])  #(TSxBS, o/p)

    # Cost Function
    tmp = tensor.reshape(y,
                         [y.shape[0] * y.shape[1], y.shape[2]])  # (TSxBS, 12)
    cost = -tmp * tensor.log(probs + 1e-8) - (1 - tmp) * tensor.log(
        1 - probs + 1e-8)  # (TSxBS, 12)
    cost = cost.sum(1)  # (TSxBS,)
    cost = cost.reshape([x.shape[0], x.shape[1]])  # (TS, BS)
    cost = (cost * mask).sum(0).sum(0)  # float32
    # Predictions
    probs = probs.reshape([x.shape[0], x.shape[1],
                           probs.shape[1]])  # (TS, BS, 12)
    preds = tensor.mean(probs[-last_n:, :, :], axis=0)  # (BS, 12)

    opt_outs = dict()
    if options['selector']:
        opt_outs['selector'] = sels

    return trng, use_noise, [x, mask, y], alphas, cost, opt_outs, preds
示例#55
0
文件: sp.py 项目: jsalvatier/Theano-1
def convolve(kerns, kshp, nkern, images, imgshp, step=(1, 1), bias=None,
             mode='valid', flatten=True):
    """Convolution implementation by sparse matrix multiplication.

    :note: For best speed, put the matrix which you expect to be
           smaller as the 'kernel' argument

    "images" is assumed to be a matrix of shape batch_size x img_size,
    where the second dimension represents each image in raster order

    If flatten is "False", the output feature map will have shape:

    .. code-block:: python

        batch_size x number of kernels x output_size

    If flatten is "True", the output feature map will have shape:

    .. code-block:: python

        batch_size x number of kernels * output_size

    .. note::

        IMPORTANT: note that this means that each feature map (image
        generate by each kernel) is contiguous in memory. The memory
        layout will therefore be: [ <feature_map_0> <feature_map_1>
        ... <feature_map_n>], where <feature_map> represents a
        "feature map" in raster order

    kerns is a 2D tensor of shape nkern x N.prod(kshp)

    :param kerns: 2D tensor containing kernels which are applied at every pixel
    :param kshp: tuple containing actual dimensions of kernel (not symbolic)
    :param nkern: number of kernels/filters to apply.
                  nkern=1 will apply one common filter to all input pixels
    :param images: tensor containing images on which to apply convolution
    :param imgshp: tuple containing image dimensions
    :param step: determines number of pixels between adjacent receptive fields
                 (tuple containing dx,dy values)
    :param mode: 'full', 'valid' see CSM.evaluate function for details
    :param sumdims: dimensions over which to sum for the tensordot operation.
                    By default ((2,),(1,)) assumes kerns is a nkern x kernsize
                    matrix and images is a batchsize x imgsize matrix
                    containing flattened images in raster order
    :param flatten: flatten the last 2 dimensions of the output. By default,
                    instead of generating a batchsize x outsize x nkern tensor,
                    will flatten to batchsize x outsize*nkern

    :return: out1, symbolic result
    :return: out2, logical shape of the output img (nkern,heigt,width)

    :TODO: test for 1D and think of how to do n-d convolutions
    """
    N = numpy
    # start by computing output dimensions, size, etc
    kern_size = N.int64(N.prod(kshp))

    # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
    # in the first case, default nfeatures to 1
    if N.size(imgshp) == 2:
        imgshp = (1,) + imgshp

    # construct indices and index pointers for sparse matrix, which,
    # when multiplied with input images will generate a stack of image
    # patches
    indices, indptr, spmat_shape, sptype, outshp = \
            convolution_indices.conv_eval(imgshp, kshp, step, mode)

    # build sparse matrix, then generate stack of image patches
    csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices,
                                    indptr, spmat_shape)
    patches = (sparse.structured_dot(csc, images.T)).T

    # compute output of linear classifier
    pshape = tensor.stack(images.shape[0] * tensor.as_tensor(N.prod(outshp)),\
                          tensor.as_tensor(imgshp[0] * kern_size))
    patch_stack = tensor.reshape(patches, pshape, ndim=2)

    # kern is of shape: nkern x ksize*number_of_input_features
    # output is thus of shape: bsize*outshp x nkern
    output = tensor.dot(patch_stack, kerns.T)

    # add bias across each feature map (more efficient to do it now)
    if bias is not None:
        output += bias

    # now to have feature maps in raster order ...
    # go from bsize*outshp x nkern to bsize x nkern*outshp
    newshp = tensor.stack(images.shape[0],\
                          tensor.as_tensor(N.prod(outshp)),\
                          tensor.as_tensor(nkern))
    tensout = tensor.reshape(output, newshp, ndim=3)
    output = tensor.DimShuffle((False,) * tensout.ndim, (0, 2, 1))(tensout)
    if flatten:
        output = tensor.flatten(output, 2)

    return output, N.hstack((nkern, outshp))
示例#56
0
    def define_layers(self):
        self.layers = []
        self.params = []

        for i in xrange(self.num_hds):
            if i == 0:
                layer_input = self.X
                h_shape = (self.out_size, self.hidden_size_list[0])
            else:
                layer_input = self.layers[i - 1].activation
                h_shape = (self.hidden_size_list[i - 1],
                           self.hidden_size_list[i])

            if self.cell == "gru":
                hidden_layer = GRULayer(self.rng,
                                        self.prefix + self.layer_id + str(i),
                                        h_shape, layer_input, self.mask,
                                        self.is_train, self.batch_size,
                                        self.drop_rate)
            elif self.cell == "lstm":
                hidden_layer = LSTMLayer(self.rng,
                                         self.prefix + self.layer_id + str(i),
                                         h_shape, layer_input, self.mask,
                                         self.is_train, self.batch_size,
                                         self.drop_rate)
            self.layers.append(hidden_layer)
            self.params += hidden_layer.params

        #the last decoder layer for decoding
        if self.num_hds == 0:
            output_layer_input = self.X
            last_shape = (self.in_size, self.out_size)
        else:
            output_layer_input = self.layers[-1].activation
            last_shape = (self.in_size, self.layers[-1].out_size)

        self.W_hy = init_weights((last_shape[1], last_shape[0]),
                                 self.prefix + "W_hy" + self.layer_id)
        self.b_y = init_bias(last_shape[0],
                             self.prefix + "b_y" + self.layer_id)
        if self.cell == "gru":
            self.decoder = GRULayer(self.rng, self.prefix + self.layer_id,
                                    last_shape, output_layer_input, self.mask,
                                    self.is_train, self.batch_size,
                                    self.drop_rate)

            def _active(m, pre_h, x):
                x = T.reshape(x, (self.batch_size, last_shape[0]))
                pre_h = T.reshape(pre_h, (self.batch_size, last_shape[1]))

                h = self.decoder._active(x, pre_h)
                y = T.nnet.softmax(T.dot(h, self.W_hy) + self.b_y)
                y = y * m[:, None]

                h = T.reshape(h, (1, self.batch_size * last_shape[1]))
                y = T.reshape(y, (1, self.batch_size * last_shape[0]))
                return h, y

            [h, y], updates = theano.scan(
                _active,  #n_steps = self.words,
                sequences=[self.mask],
                outputs_info=[{
                    'initial': output_layer_input,
                    'taps': [-1]
                },
                              T.alloc(floatX(0.), 1,
                                      self.batch_size * last_shape[0])])
        elif self.cell == "lstm":
            self.decoder = LSTMLayer(self.rng, self.prefix + self.layer_id,
                                     last_shape, output_layer_input, self.mask,
                                     self.is_train, self.batch_size,
                                     self.drop_rate)

            def _active(m, pre_h, pre_c, x):
                x = T.reshape(x, (self.batch_size, last_shape[0]))
                pre_h = T.reshape(pre_h, (self.batch_size, last_shape[1]))
                pre_c = T.reshape(pre_c, (self.batch_size, last_shape[1]))

                h, c = self.decoder._active(x, pre_h, pre_c)

                y = T.nnet.softmax(T.dot(h, self.W_hy) + self.b_y)
                y = y * m[:, None]

                h = T.reshape(h, (1, self.batch_size * last_shape[1]))
                c = T.reshape(c, (1, self.batch_size * last_shape[1]))
                y = T.reshape(y, (1, self.batch_size * last_shape[0]))
                return h, c, y

            [h, c, y], updates = theano.scan(
                _active,
                sequences=[self.mask],
                outputs_info=[{
                    'initial': output_layer_input,
                    'taps': [-1]
                }, {
                    'initial': output_layer_input,
                    'taps': [-1]
                },
                              T.alloc(floatX(0.), 1,
                                      self.batch_size * last_shape[0])])

        y = T.reshape(y, (self.words, self.batch_size * last_shape[0]))
        self.activation = y
        self.params += self.decoder.params
        self.params += [self.W_hy, self.b_y]
        # self.layers.append(self.decoder)
        self.hhhh = h
示例#57
0
    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), k=4):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height,filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows,#cols)
        """
        assert image_shape[1] == filter_shape[1]
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
                   numpy.prod(poolsize))
        # initialize weights with random weights
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(numpy.asarray(
            rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
            dtype=theano.config.floatX),
                               borrow=True)

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        # convolve input feature maps with filters
        conv_out = conv.conv2d(input=input, filters=self.W,
                filter_shape=filter_shape, image_shape=image_shape)
        #images2neibs produces a 2D matrix
        neighborsForPooling = TSN.images2neibs(ten4=conv_out, neib_shape=(1,conv_out.shape[3]), mode='ignore_borders')

        #k = poolsize[1]

        neighborsArgSorted = T.argsort(neighborsForPooling, axis=1)
        kNeighborsArg = neighborsArgSorted[:,-k:]
        kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1)

        ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k)
        jj = kNeighborsArgSorted.flatten()
        pooledkmaxTmp = neighborsForPooling[ii, jj]

        # reshape pooledkmaxTmp
        new_shape = T.cast(T.join(0, conv_out.shape[:-2],
                           T.as_tensor([conv_out.shape[2]]),
                           T.as_tensor([k])),
                           'int64')
        pooled_out = T.reshape(pooledkmaxTmp, new_shape, ndim=4)
        
        # downsample each feature map individually, using maxpooling
        '''
        pooled_out = downsample.max_pool_2d(input=conv_out,
                                            ds=poolsize, ignore_border=True)
        '''
        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))

        # store parameters of this layer
        self.params = [self.W, self.b]
    def __init__(self,
                 env_spec,
                 hidden_dim=32,
                 feature_network=None,
                 state_include_action=True,
                 hidden_nonlinearity=NL.tanh):
        """
        :param env_spec: A spec for the env.
        :param hidden_dim: dimension of hidden layer
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :return:
        """
        assert isinstance(env_spec.action_space, Discrete)
        Serializable.quick_init(self, locals())
        super(CategoricalGRUPolicy, self).__init__(env_spec)

        obs_dim = env_spec.observation_space.flat_dim
        action_dim = env_spec.action_space.flat_dim

        if state_include_action:
            input_dim = obs_dim + action_dim
        else:
            input_dim = obs_dim

        l_input = L.InputLayer(shape=(None, None, input_dim), name="input")

        if feature_network is None:
            feature_dim = input_dim
            l_flat_feature = None
            l_feature = l_input
        else:
            feature_dim = feature_network.output_layer.output_shape[-1]
            l_flat_feature = feature_network.output_layer
            l_feature = OpLayer(
                l_flat_feature,
                extras=[l_input],
                name="reshape_feature",
                op=lambda flat_feature, input: TT.reshape(
                    flat_feature,
                    [input.shape[0], input.shape[1], feature_dim]),
                shape_op=lambda _, input_shape:
                (input_shape[0], input_shape[1], feature_dim))

        prob_network = GRUNetwork(input_shape=(feature_dim, ),
                                  input_layer=l_feature,
                                  output_dim=env_spec.action_space.n,
                                  hidden_dim=hidden_dim,
                                  hidden_nonlinearity=hidden_nonlinearity,
                                  output_nonlinearity=TT.nnet.softmax,
                                  name="prob_network")

        self.prob_network = prob_network
        self.feature_network = feature_network
        self.l_input = l_input
        self.state_include_action = state_include_action

        flat_input_var = TT.matrix("flat_input")
        if feature_network is None:
            feature_var = flat_input_var
        else:
            feature_var = L.get_output(
                l_flat_feature, {feature_network.input_layer: flat_input_var})

        self.f_step_prob = ext.compile_function(
            [flat_input_var, prob_network.step_prev_hidden_layer.input_var],
            L.get_output([
                prob_network.step_output_layer, prob_network.step_hidden_layer
            ], {prob_network.step_input_layer: feature_var}))

        self.input_dim = input_dim
        self.action_dim = action_dim
        self.hidden_dim = hidden_dim

        self.prev_action = None
        self.prev_hidden = None
        self.dist = RecurrentCategorical(env_spec.action_space.n)

        out_layers = [prob_network.output_layer]
        if feature_network is not None:
            out_layers.append(feature_network.output_layer)

        LasagnePowered.__init__(self, out_layers)
示例#59
0
    def __init__(self,
                 C,
                 D,
                 K,
                 S,
                 rng_state=None,
                 epsilon=1e-2,
                 use_precision=True,
                 tradeoff_hybrid=0.5,
                 tradeoff_ssl=0.9,
                 gamma=1,
                 eta=10,
                 init_params=None):
        '''
        Constructs the Theano computation graph for the given parameters
        
        C: Number of classes
        D: Number of input features
        K: List of length C containing the number of components per class
        S: Number of dimensions of the low-rank approximation of the DPLR matrix
          structure. Note that this parameter is actually called 'R' in the
          paper.
        rng_state: Random number generator seed to use if the parameters should
          be initialized randomly. This parameter is ignored if 'init_params' is
          given.
        epsilon: Regularizer for the diagonal of the covariance matrices.
        use_precision: Determines if precisions or covariances should be used.
          The precision is the inverse of the covariance matrix.
        tradeoff_hybrid: The lambda parameter of the hybrid objective. Close to
          1 means very generative, close to 0 means very discriminative.
        tradeoff_ssl: The kappa parameter of the hybrid objective for semi-
          supervised learning. Close to 1 puts more weight onto the labeled
          samples, close to 0 puts more weights onto the unlabeled samples.
        gamma: The gamma parameter of the MM/LM objective.
        eta: The parameter for the softmax approximation.
        init_params: Use this to provide initial parameters. Usually parameters
          obtained with the EM algorithm are provided here. init_params must be
          a five-tuple containing
            - mu_vals (K_all x D): Mean values for each component
            - s_vals (K_all x D x S): Low-rank matrices for each component
            - d_rho_vals (D x K_all): Diagonal variances for each component (inverse softplus values)
            - prior_k_rho_vals (K_all): Logits of the component priors
            - prior_c_rho_vals (C): Logits of the class priors
          with K_all = sum(K). The component parameters are stored linearly for
          all classes. E.g. the first K[0] entries correspond to components of
          class 0. If precisions are used instead of covariances, the use of
          s_vals and d_rho_vals changes accordingly.
        '''
        self.x = T.matrix('x')
        self.t = T.ivector('t')
        self.tradeoff_hybrid = tradeoff_hybrid
        self.tradeoff_ssl = tradeoff_ssl
        self.gamma = gamma
        self.eta = eta
        self.epsilon = epsilon

        K_all = np.sum(K)

        if init_params is None:
            rng = np.random.RandomState(rng_state)
            mu_vals = rng.normal(0., 1., size=(K_all, D))
            s_vals = rng.normal(0., 1., size=(K_all, D, S))
            d_rho_vals = rng.normal(0, 0.1, size=(D, K_all))
            prior_k_rho_vals = np.zeros((np.sum(K), ))
            prior_c_rho_vals = np.zeros((C, ))
        else:
            assert len(init_params) == 5
            mu_vals, s_vals, d_rho_vals, prior_k_rho_vals, prior_c_rho_vals = init_params
            assert mu_vals.shape == (K_all, D)
            assert s_vals.shape == (K_all, D, S)
            assert d_rho_vals.shape == (D, K_all)
            assert prior_k_rho_vals.shape == (K_all, )
            assert prior_c_rho_vals.shape == (C, )

        mu_vals = np.asarray(mu_vals, dtype=theano.config.floatX)
        s_vals = np.asarray(s_vals, dtype=theano.config.floatX)
        d_rho_vals = np.asarray(d_rho_vals, dtype=theano.config.floatX)
        prior_k_rho_vals = np.asarray(prior_k_rho_vals,
                                      dtype=theano.config.floatX)
        prior_c_rho_vals = np.asarray(prior_c_rho_vals,
                                      dtype=theano.config.floatX)

        # Shared variables
        self.means = theano.shared(mu_vals, name='means', borrow=True)
        self.s = theano.shared(s_vals, name='s', borrow=True)
        self.d_rho = theano.shared(d_rho_vals, name='d_rho', borrow=True)
        self.prior_k_rho = theano.shared(prior_k_rho_vals,
                                         name='prior_k_rho',
                                         borrow=True)
        self.prior_c_rho = theano.shared(prior_c_rho_vals,
                                         name='prior_c_rho',
                                         borrow=True)
        self.params = [
            self.means, self.s, self.d_rho, self.prior_k_rho, self.prior_c_rho
        ]

        self.d = T.nnet.softplus(self.d_rho) + self.epsilon
        if use_precision == True:
            # s and d are used to represent precision matrices
            self.exponent = T.dot(self.x**2, self.d)  #xDx
            self.exponent -= 2 * T.dot(self.x, self.d * self.means.T)  #-2xDm
            self.exponent += T.sum(self.means**2 * self.d.T, axis=1)  # mDm
            self.exponent += T.sum(T.dot(self.x, self.s)**2, axis=2)  # xSSx
            self.exponent -= 2 * T.sum(T.dot(self.x, self.s) * T.sum(
                self.s * self.means[:, :, None], axis=1)[None, :, :],
                                       axis=2)  # -2xSSm
            self.exponent += T.sum(T.sum(self.s * self.means[:, :, None],
                                         axis=1)**2,
                                   axis=1)  # mSSm
            self.exponent *= -0.5

            eye_S = T.eye(S, dtype=theano.config.floatX)
            self.aux_matrix = T.batched_tensordot(
                self.s / self.d.T[:, :, None], self.s, axes=(1, 1)) + eye_S
            self.aux_logdet, _ = theano.scan(fn=lambda aux: logdet_psd(aux),
                                             outputs_info=None,
                                             sequences=self.aux_matrix,
                                             non_sequences=None)
            self.logdet = T.sum(T.log(self.d), axis=0) + self.aux_logdet

            # logpK contains all log probabilities of all components in an (N x sum(K)) array
            # Note that the log component priors are not added yet
            self.logpK = -0.5 * D * T.log(
                2. * np.pi) + 0.5 * self.logdet + self.exponent
        else:
            # s and d are used to represent covariance matrices
            if S == 1:
                self.aux_matrix = T.sum(
                    self.s[:, :, 0] / self.d.T * self.s[:, :, 0],
                    axis=1).reshape((K_all, 1, 1)) + 1.
            else:
                # Since the latest Cuda/Theano update, the following two lines
                # cause an error in the case of S=1.
                eye_S = T.eye(S, dtype=theano.config.floatX)
                self.aux_matrix = T.batched_tensordot(
                    self.s / self.d.T[:, :, None], self.s, axes=(1, 1)) + eye_S
            (self.aux_inv, self.aux_logdet), _ = theano.scan(
                fn=lambda aux:
                [T.nlinalg.matrix_inverse(aux),
                 logdet_psd(aux)],
                outputs_info=None,
                sequences=[self.aux_matrix],
                non_sequences=None)
            self.logdet = T.sum(T.log(self.d), axis=0) + self.aux_logdet

            # Product inv(d) * s for all K --> K x D x S
            self.rs = self.s / self.d.T[:, :, None]
            # Product inv(d) * s * aux_inv for all K --> K x D x S
            self.ls = T.batched_dot(self.rs, self.aux_inv)

            # s and d are used to represent covariance matrices
            self.exponent = T.dot(self.x**2, 1. / self.d)  #xDx
            self.exponent -= 2 * T.dot(self.x,
                                       (1. / self.d) * self.means.T)  #-2xDm
            self.exponent += T.sum(self.means**2 * (1. / self.d.T),
                                   axis=1)  # mDm
            self.exponent -= T.sum(T.dot(self.x, self.ls) *
                                   T.dot(self.x, self.rs),
                                   axis=2)  # -x ls rs x
            self.exponent += 2 * T.sum(T.dot(self.x, self.ls) * T.sum(
                self.rs * self.means[:, :, None], axis=1)[None, :, :],
                                       axis=2)  # 2x ls rs m
            self.exponent -= T.sum(
                T.sum(self.ls * self.means[:, :, None], axis=1) *
                T.sum(self.rs * self.means[:, :, None], axis=1),
                axis=1)  # -m ls rs m
            self.exponent *= -0.5

            # logpK contains all log probabilities of all components in an (N x sum(K)) array
            # Note that the log component priors are not added yet
            self.logpK = -0.5 * D * T.log(
                2. * np.pi) - 0.5 * self.logdet + self.exponent

        # logpC contains the log joint probabilities p(x,c) in an (N x C) array
        self.logpC = self.logpK
        self.logpC_list = []
        for c in range(C):
            k1 = int(np.sum(K[:c]))
            k2 = int(k1 + K[c])
            self.logpC_list.append(self.logpC[:, k1:k2])
            aux_max = T.max(self.prior_k_rho[k1:k2]
                            )  # Compute log-probabilities without division
            log_prior_k = self.prior_k_rho[k1:k2] - T.log(
                T.sum(T.exp(self.prior_k_rho[k1:k2] - aux_max))) - aux_max
            self.logpC_list[c] += log_prior_k
            aux_max = T.max(self.logpC_list[c], axis=1, keepdims=True)
            self.logpC_list[c] = T.log(
                T.sum(T.exp(self.logpC_list[c] - aux_max),
                      axis=1)) + aux_max.flatten()
        self.logpC = T.stack(self.logpC_list, axis=1)
        aux_max = T.max(
            self.prior_c_rho)  # Compute log-probabilities without division
        log_prior_c = self.prior_c_rho - T.log(
            T.sum(T.exp(self.prior_c_rho - aux_max))) - aux_max
        self.logpC += log_prior_c

        # mm and cll objective are only for labeled data
        # logl objective is slightly different for labeled and unlabeled data
        idx_sv = T.ge(self.t, 0).nonzero()
        idx_usv = T.lt(self.t, 0).nonzero()
        self.logpC_sv = self.logpC[idx_sv]
        self.logpC_usv = self.logpC[idx_usv]
        is_sv_empty = T.eq(self.logpC_sv.shape[0], 0)
        is_usv_empty = T.eq(self.logpC_usv.shape[0], 0)

        # If there are no supervised/unsupervised samples create a dummy entry
        # to avoid problems. The corresponding costs are set to 0 later. We set
        # the number of rows to 2 because 1 results in an error.
        # The problems appear to be CUDNN related if for instance a sum over an
        # empty tensor is computed.
        self.logpC_sv = theano.ifelse.ifelse(
            is_sv_empty, T.zeros((2, C), theano.config.floatX), self.logpC_sv)
        self.t_sv = theano.ifelse.ifelse(is_sv_empty, T.zeros((2, ), 'int32'),
                                         self.t[idx_sv])
        self.logpC_usv = theano.ifelse.ifelse(
            is_usv_empty, T.zeros((2, C), theano.config.floatX),
            self.logpC_usv)

        # Compute mean divisor since T.mean causes divisions by zero if there
        # are no labeled or unlabeled data in the minibatch. Therefore, we
        # compute T.mean with T.sum()/N
        self.aux_mean_divisor_sv = T.switch(is_sv_empty, 1.,
                                            self.logpC_sv.shape[0])
        self.aux_mean_divisor_usv = T.switch(is_usv_empty, 1.,
                                             self.logpC_usv.shape[0])

        # Create cost functions

        # Compute the log of the softmax of logpc which gives the log of the conditional likelihood
        self.cll_max_tmp = T.max(self.logpC_sv, axis=1, keepdims=True)
        self.cll_logsumexp = T.log(
            T.sum(T.exp(self.logpC_sv - self.cll_max_tmp),
                  axis=1)) + T.reshape(self.cll_max_tmp,
                                       (self.cll_max_tmp.shape[0], ))
        self.cost_cll = theano.ifelse.ifelse(
            is_sv_empty, 0.,
            -T.sum(self.logpC_sv[T.arange(self.logpC_sv.shape[0]), self.t_sv] -
                   self.cll_logsumexp))
        self.cost_cll_normalized = self.cost_cll / self.aux_mean_divisor_sv

        # Negative log-likelihood of labeled data
        self.cost_nll_sv = theano.ifelse.ifelse(
            is_sv_empty, 0.,
            -T.sum(self.logpC_sv[T.arange(self.t_sv.shape[0]), self.t_sv]))
        self.cost_nll_sv_normalized = self.cost_nll_sv / self.aux_mean_divisor_sv

        # Negative log-likelihood of unlabeled data
        self.logpC_usv_max = T.max(self.logpC_usv, axis=1, keepdims=True)
        self.logpC_usv_logsumexp = T.log(
            T.sum(T.exp(self.logpC_usv - self.logpC_usv_max),
                  axis=1)) + T.reshape(self.logpC_usv_max,
                                       (self.logpC_usv.shape[0], ))
        self.cost_nll_usv = theano.ifelse.ifelse(
            is_usv_empty, 0., -T.sum(self.logpC_usv_logsumexp))
        self.cost_nll_usv_normalized = self.cost_nll_usv / self.aux_mean_divisor_usv

        # Total negative log-likelihood
        self.cost_nll = self.cost_nll_sv + self.cost_nll_usv
        self.cost_nll_normalized = self.cost_nll / self.x.shape[0]

        self.margin_start = self.gamma + self.logpC_sv - T.reshape(
            self.logpC_sv[T.arange(self.t_sv.shape[0]), self.t_sv],
            (self.t_sv.shape[0], 1))
        self.margin = self.gamma + self.logpC_sv - T.reshape(
            self.logpC_sv[T.arange(self.t_sv.shape[0]), self.t_sv],
            (self.t_sv.shape[0], 1))
        self.margin *= self.eta
        self.margin = T.set_subtensor(
            self.margin[T.arange(self.t_sv.shape[0]), self.t_sv], -np.inf)

        # Log-sum-exp trick
        self.margin_max_tmp = T.max(self.margin, axis=1, keepdims=True)
        self.max_margin = T.log(
            T.sum(T.exp(self.margin - self.margin_max_tmp),
                  axis=1)) + T.reshape(self.margin_max_tmp,
                                       (self.margin.shape[0], ))
        self.max_margin /= self.eta

        # The cast in the following statement resolves an error that says that
        # both paths of ifelse must be of equal type. Setting the dtype argument
        # of T.sum did not solve the problem.
        self.cost_mm = theano.ifelse.ifelse(
            is_sv_empty, 0.,
            T.cast(T.sum(T.nnet.relu(self.max_margin)), theano.config.floatX))
        self.cost_mm_normalized = self.cost_mm / self.aux_mean_divisor_sv

        # Note: The division by self.x.shape[0] in the following two expressions
        # ensures that gradients of minibatches are unbiased.

        # Cost with CLL criterion
        self.cost_hybrid_cll = (
            self.tradeoff_hybrid *
            (self.tradeoff_ssl * self.cost_nll_sv +
             (1. - self.tradeoff_ssl) * self.cost_nll_usv) +
            (1. - self.tradeoff_hybrid) * self.cost_cll) / (self.x.shape[0])

        # Cost with MM criterion
        self.cost_hybrid_mm = (self.tradeoff_hybrid *
                               (self.tradeoff_ssl * self.cost_nll_sv +
                                (1. - self.tradeoff_ssl) * self.cost_nll_usv) +
                               (1. - self.tradeoff_hybrid) * self.cost_mm) / (
                                   self.x.shape[0])

        # Predictions and classification errors
        self.y = T.argmax(self.logpC, axis=1)
        self.y_sv = self.y[idx_sv]
        self.y_usv = self.y[idx_usv]
        self.ce = theano.ifelse.ifelse(
            is_sv_empty, 0.,
            T.mean(T.neq(self.y_sv, self.t_sv), dtype=theano.config.floatX))
示例#60
0
def pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0),
            mode='max'):
    """Downscale the input by a specified factor

    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1])

    Parameters
    ----------
    input : N-D theano tensor of input images
        Input images. Max pooling will be done over the 2 last dimensions.
    ds : tuple of length 2
        Factor by which to downscale (vertical ds, horizontal ds).
        (2,2) will halve the image in each dimension.
    ignore_border : bool (default None, will print a warning and set to False)
        When True, (5,5) input with ds=(2,2) will generate a (2,2) output.
        (3,3) otherwise.
    st : tuple of two ints
        Stride size, which is the number of shifts over rows/cols to get the
        next pool region. If st is None, it is considered equal to ds
        (no overlap on pooling regions).
    padding : tuple of two ints
        (pad_h, pad_w), pad zeros to extend beyond four borders of the
        images, pad_h is the size of the top and bottom margins, and
        pad_w is the size of the left and right margins.
    mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
        Operation executed on each window. `max` and `sum` always exclude
        the padding in the computation. `average` gives you the choice to
        include or exclude it.

    """
    if input.ndim < 2:
        raise NotImplementedError('pool_2d requires a dimension >= 2')
    if ignore_border is None:
        warnings.warn(
            "pool_2d() will have the parameter ignore_border"
            " default value changed to True (currently"
            " False). To have consistent behavior with all Theano"
            " version, explicitly add the parameter ignore_border=True."
            " On the GPU, using ignore_border=True is needed to use cuDNN."
            " When using ignore_border=False and not using cuDNN, the only"
            " GPU combination supported is when"
            " `ds == st and padding == (0, 0) and mode == 'max'`."
            " Otherwise, the convolution will be executed on CPU.",
            stacklevel=2)
        ignore_border = False
    if input.ndim == 4:
        op = Pool(ds, ignore_border, st=st, padding=padding,
                  mode=mode)
        output = op(input)
        return output

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(tensor.join(0, batch_size,
                                        tensor.as_tensor([1]),
                                        img_shape), 'int64')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = Pool(ds, ignore_border, st=st, padding=padding,
              mode=mode)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)