Exemplo n.º 1
0
 def step(hx_ir, hx_hat, h_prev):
     hhs = self.conv_wh(h_prev)
     hh_ir, hh_hat = tt.split(hhs,
                              splits_size=ss,
                              n_splits=len(ss),
                              axis=1)
     ir = tt.nnet.sigmoid(hx_ir + hh_ir)
     i, r = tt.split(ir, [self.n_output_ch] * 2, 2, axis=1)
     hat = tt.tanh(hx_hat + r * hh_hat)
     return (1.0 - i) * hat + i * h_prev
Exemplo n.º 2
0
 def fused_forward_one(hx_ir_t, hx_hat_t, h_prev):
     # second fused gemm
     hhs = tt.dot(h_prev, Rs)
     hh_ir_t, hh_hat_t = tt.split(hhs,
                                  output_sizes,
                                  len(output_sizes),
                                  axis=1)
     ir_t = tt.nnet.sigmoid(hx_ir_t + hh_ir_t)
     i_t, r_t = tt.split(ir_t, [self.output_dim] * 2, 2, axis=1)
     h_hat_t = tt.tanh(hx_hat_t + (r_t * (hh_hat_t + self.b_rh)))
     h_curr = ((1.0 - i_t) * h_hat_t) + (i_t * h_prev)
     return h_curr
Exemplo n.º 3
0
 def step(hx, h_prev, c_prev):
     hss = hx + self.conv_wh(h_prev)  # (n_batch, n_output_ch, n_freq)
     h_ifo, h_hat = tt.split(hss,
                             [self.n_output_ch * 3, self.n_output_ch],
                             2,
                             axis=1)
     i, f, o = tt.split(tt.nnet.sigmoid(h_ifo), [self.n_output_ch] * 3,
                        3,
                        axis=1)
     c_hat = tt.tanh(h_hat)
     c = f * c_prev + i * c_hat
     h = o * tt.tanh(c)
     return h, c
Exemplo n.º 4
0
 def _phase_shift(input, r):
     bsize, c, a, b = input.shape[
         0], 1, self.output_shape[2] // r, self.output_shape[3] // r
     X = T.reshape(input, (bsize, r, r, a, b))
     X = T.transpose(X, (0, 3, 4, 1, 2))  # bsize, a, b, r2,r1
     X = T.split(x=X, splits_size=[1] * a, n_splits=a,
                 axis=1)  # a, [bsize, b, r, r]
     X = [T.reshape(x, (bsize, b, r, r)) for x in X]
     X = T.concatenate(X, axis=2)  # bsize, b, a*r, r
     X = T.split(x=X, splits_size=[1] * b, n_splits=b,
                 axis=1)  # b, [bsize, a*r, r]
     X = [T.reshape(x, (bsize, a * r, r)) for x in X]
     X = T.concatenate(X, axis=2)  # bsize, a*r, b*r
     return X.dimshuffle(0, 'x', 1, 2)
Exemplo n.º 5
0
def test_local_gpu_split():
    """ Test that the GpuSplit op is being applied and works """
    # Construct symbolic split
    x = tensor.fvector()
    splits = tensor.lvector()
    ra, rb, rc = tensor.split(x, splits, n_splits=3, axis=0)
    # Compile function to use CPU
    f = theano.function([x, splits], [ra, rb, rc], mode=mode_without_gpu)
    # Get values for CPU version
    cpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
    l = f.maker.fgraph.toposort()
    # Ensure that one op is theano.tensor.Split
    assert any([isinstance(o.op, theano.tensor.Split) for o in l])
    # GPU version
    f = theano.function([x, splits], [ra, rb, rc], mode=mode_with_gpu)
    gpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
    l = f.maker.fgraph.toposort()
    assert any([isinstance(o.op, cuda.GpuSplit) for o in l])
    # Check equality
    assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])

    # Test the other path of the optimizer, when it is the output that
    # is moved to the GPU.
    ra = cuda.gpu_from_host(ra)
    f = theano.function([x, splits], [ra, rb, rc],
                        mode=mode_with_gpu.excluding("InputToGpuOptimizer"))
    gpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
    l = f.maker.fgraph.toposort()
    assert any([isinstance(o.op, cuda.GpuSplit) for o in l])
    # Check equality
    assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])

    # Test that split with only 1 output work
    ra = tensor.split(x, splits, n_splits=1, axis=0)
    f = theano.function([x, splits], [ra], mode=mode_without_gpu)
    cpu_res = f([0, 1, 2, 3, 4, 5], [6])
    l = f.maker.fgraph.toposort()
    # Ensure that no op is theano.tensor.Split or GpuSplit, they get
    # optimized away.
    assert not any(
        [isinstance(o.op, (theano.tensor.Split, cuda.GpuSplit)) for o in l])
    # GPU version
    f = theano.function([x, splits], [ra], mode=mode_with_gpu)
    gpu_res = f([0, 1, 2, 3, 4, 5], [6])
    l = f.maker.fgraph.toposort()
    assert not any(
        [isinstance(o.op, (theano.tensor.Split, cuda.GpuSplit)) for o in l])
    # Check equality
    assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])
Exemplo n.º 6
0
def test_local_gpu_split():
    """ Test that the GpuSplit op is being applied and works """
    # Construct symbolic split
    x = tensor.fvector()
    splits = tensor.lvector()
    ra, rb, rc = tensor.split(x, splits, n_splits=3, axis=0)
    # Compile function to use CPU
    f = theano.function([x, splits], [ra, rb, rc], mode=mode_without_gpu)
    # Get values for CPU version
    cpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
    l = f.maker.fgraph.toposort()
    # Ensure that one op is theano.tensor.Split
    assert any([isinstance(o.op, theano.tensor.Split) for o in l])
    # GPU version
    f = theano.function([x, splits], [ra, rb, rc], mode=mode_with_gpu)
    gpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
    l = f.maker.fgraph.toposort()
    assert any([isinstance(o.op, cuda.GpuSplit) for o in l])
    # Check equality
    assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])

    # Test the other path of the optimizer, when it is the output that
    # is moved to the GPU.
    ra = cuda.gpu_from_host(ra)
    f = theano.function([x, splits], [ra, rb, rc],
                        mode=mode_with_gpu.excluding("InputToGpuOptimizer"))
    gpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
    l = f.maker.fgraph.toposort()
    assert any([isinstance(o.op, cuda.GpuSplit) for o in l])
    # Check equality
    assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])

    # Test that split with only 1 output work
    ra = tensor.split(x, splits, n_splits=1, axis=0)
    f = theano.function([x, splits], [ra], mode=mode_without_gpu)
    cpu_res = f([0, 1, 2, 3, 4, 5], [6])
    l = f.maker.fgraph.toposort()
    # Ensure that no op is theano.tensor.Split or GpuSplit, they get
    # optimized away.
    assert not any([isinstance(o.op, (theano.tensor.Split,
                                      cuda.GpuSplit)) for o in l])
    # GPU version
    f = theano.function([x, splits], [ra], mode=mode_with_gpu)
    gpu_res = f([0, 1, 2, 3, 4, 5], [6])
    l = f.maker.fgraph.toposort()
    assert not any([isinstance(o.op, (theano.tensor.Split,
                                      cuda.GpuSplit)) for o in l])
    # Check equality
    assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])
Exemplo n.º 7
0
        def fused_forward_one(hx, h_prev, c_prev):
            hs = hx + tt.dot(h_prev, Rs)
            h_ifo, h_hat = tt.split(hs, [self.output_dim * 3, self.output_dim],
                                    2,
                                    axis=1)

            i_t, f_t, o_t = tt.split(tt.nnet.sigmoid(h_ifo),
                                     [self.output_dim] * 3,
                                     3,
                                     axis=1)
            c_hat_t = tt.tanh(h_hat)

            c_t = f_t * c_prev + i_t * c_hat_t
            h_t = o_t * tt.tanh(c_t)
            return h_t, c_t
Exemplo n.º 8
0
    def get_output_for(self, input, **kwargs):
        z = input
        # z is (batch_size, num_latent_units)

        d = z.shape[1] // 2
        D = z.shape[1]

        for n in range(len(self.Wbs) - 1, -1, -1):
            y1, y2 = T.split(z, [d, D - d], 2, axis=1)
            W, b = self.Wbs[n]
            if n % 2:
                m_y1 = self.nonlin(T.dot(y1, W) + b.dimshuffle('x', 0))
                m_y2 = 0
            else:
                m_y1 = 0
                m_y2 = self.nonlin(T.dot(y2, W) + b.dimshuffle('x', 0))

            x1 = y1 - m_y2
            x2 = y2 - m_y1

            z = T.concatenate([x1, x2], axis=1)

        f_z = z

        return f_z
Exemplo n.º 9
0
    def get_output_for(self, input, **kwargs):
        # 1) calculate u_hat to ensure invertibility (appendix A.1 to)
        # 2) calculate the forward transformation of the input f(z) (Eq. 8)
        # 3) calculate u_hat^T psi(z)
        # 4) calculate logdet-jacobian log|1 + u_hat^T psi(z)| to be used in the
        #    LL function

        z = input
        # # z is (batch_size, num_latent_units)

        d = z.shape[1] // 2
        D = z.shape[1]

        for n in range(self.nlayers):
            x1, x2 = T.split(z, [d, D - d], 2, axis=1)
            if n % 2:
                m_x1 = self.nonlin(
                    T.dot(x1, self.Ws[n]) + self.bs[n].dimshuffle('x', 0))
                m_x2 = 0
            else:
                m_x1 = 0
                m_x2 = self.nonlin(
                    T.dot(x2, self.Ws[n]) + self.bs[n].dimshuffle('x', 0))

            y1 = x1 + m_x2
            y2 = x2 + m_x1

            z = T.concatenate([y1, y2], axis=1)

        f_z = z
        logdet_jacobian = 0. * T.sum(input, axis=1)

        return [f_z, logdet_jacobian]
Exemplo n.º 10
0
def create_iter_funcs_test(l_out, bs, N=50):
    X = T.tensor4('X')
    X_batch = T.tensor4('X_batch')

    X_repeat = T.extra_ops.repeat(X, N, axis=0)
    y_sample = layers.get_output(
        l_out, X_repeat, deterministic=False)

    # the number of splits needs to be pre-defined
    sizes = [X_repeat.shape[0] / X.shape[0]] * bs
    y_sample_split = T.as_tensor_variable(
        T.split(y_sample, sizes, bs, axis=0))
    y_hat = T.mean(y_sample_split, axis=1)
    #y_var = T.var(y_sample_split, axis=1)

    test_iter = theano.function(
        inputs=[theano.Param(X_batch)],
        outputs=y_hat,
        #outputs=[y_hat, y_var],
        givens={
            X: X_batch,
        },
    )

    return test_iter
Exemplo n.º 11
0
def create_iter_funcs_valid(l_out, bs=None, N=50, mc_dropout=False):
    X = T.tensor4('X')
    y = T.ivector('y')
    X_batch = T.tensor4('X_batch')
    y_batch = T.ivector('y_batch')

    if not mc_dropout:
        y_hat = layers.get_output(l_out, X, deterministic=True)
    else:
        if bs is None:
            raise ValueError('a fixed batch size is required for mc dropout')
        X_repeat = T.extra_ops.repeat(X, N, axis=0)
        y_sample = layers.get_output(l_out, X_repeat, deterministic=False)

        sizes = [X_repeat.shape[0] / X.shape[0]] * bs
        y_sample_split = T.as_tensor_variable(
            T.split(y_sample, sizes, bs, axis=0))
        y_hat = T.mean(y_sample_split, axis=1)

    valid_loss = T.mean(T.nnet.categorical_crossentropy(y_hat, y))
    valid_acc = T.mean(T.eq(y_hat.argmax(axis=1), y))

    valid_iter = theano.function(
        inputs=[theano.Param(X_batch),
                theano.Param(y_batch)],
        outputs=[valid_loss, valid_acc],
        givens={
            X: X_batch,
            y: y_batch,
        },
    )

    return valid_iter
Exemplo n.º 12
0
    def fused_forward(self, xs, h0):
        # pre fused gemm
        Ws = tt.concatenate([self.W_i, self.W_r, self.W_h], axis=1)
        bs = tt.concatenate(
            [self.b_wi + self.b_ru, self.b_wr + self.b_rr, self.b_wh])
        hxs = sequence_apply(lambda x: tt.dot(x, Ws) + bs, xs)
        output_sizes = [self.output_dim * 2, self.output_dim]
        hxs = tt.split(hxs,
                       splits_size=output_sizes,
                       n_splits=len(output_sizes),
                       axis=2)
        Rs = tt.concatenate([self.R_i, self.R_r, self.R_h], axis=1)

        def fused_forward_one(hx_ir_t, hx_hat_t, h_prev):
            # second fused gemm
            hhs = tt.dot(h_prev, Rs)
            hh_ir_t, hh_hat_t = tt.split(hhs,
                                         output_sizes,
                                         len(output_sizes),
                                         axis=1)
            ir_t = tt.nnet.sigmoid(hx_ir_t + hh_ir_t)
            i_t, r_t = tt.split(ir_t, [self.output_dim] * 2, 2, axis=1)
            h_hat_t = tt.tanh(hx_hat_t + (r_t * (hh_hat_t + self.b_rh)))
            h_curr = ((1.0 - i_t) * h_hat_t) + (i_t * h_prev)
            return h_curr

        states, self.updates = theano.scan(fn=fused_forward_one,
                                           sequences=hxs,
                                           outputs_info=h0)
        return states, states[-1]
Exemplo n.º 13
0
 def apply(self, x):
     result = self.mlp.apply(x)
     mu, logsigma = tensor.split(result, [self.hidden_dim] * 2, 2, axis=1)
     batch_size = x.shape[0]
     epsilons = self.theano_rng.normal((batch_size, self.hidden_dim),
                                       0., 1.)
     return mu, logsigma, mu + tensor.exp(logsigma) * epsilons
Exemplo n.º 14
0
def lyr_gru(name_, s_x_, s_state_, idim_, sdim_, lyr_linear_, axis_=-1):
    in_gate = T.nnet.sigmoid(lyr_linear_(name_+'_igate',T.join(axis_, s_x_, s_state_), idim_+sdim_, idim_))
    rec_gate = lyr_linear
    s_gated_x = s_x_ * in_gate
    s_interp_lin, s_state_tp1_lin = T.split(lyr_linear_(name_+'_main', T.join(axis_,s_gated_x, s_state_), idim_+sdim_, sdim_*2), [sdim_]*2, 2, axis_)
    s_interp = T.nnet.sigmoid(s_interp_lin)
    return T.tanh(s_state_tp1_lin)*s_interp + s_state_*(1.-s_interp)
Exemplo n.º 15
0
def convolution_layer(tensor, W, b, subsample=(1, 1), border='valid', group=1):
    W_shape = W.get_value().shape

    if border == 'same':
        pad = (W_shape[-2] / 2, W_shape[-1] / 2)
    else:
        pad = (0, 0)

    if group == 1:
        tensor = theano.sandbox.cuda.dnn.dnn_conv(
            tensor,
            W,
            subsample=subsample,
            border_mode=pad,
        )
    else:
        s = T.repeat(tensor.shape[1] / group, group)
        outputs = []
        for i, t in enumerate(T.split(tensor, s, group, axis=1)):
            W_ = W[i * W_shape[0] / group:(i + 1) * W_shape[0] / group]
            outputs.append(
                theano.sandbox.cuda.dnn.dnn_conv(
                    t,
                    W_,
                    subsample=subsample,
                    border_mode=pad,
                ))
        tensor = T.concatenate(outputs, axis=1)
    tensor = tensor + b[None, :, None, None]

    return tensor
Exemplo n.º 16
0
def convolution_layer(tensor, W, b, subsample=(1, 1), border='valid', group=1):
    W_shape = W.get_value().shape

    if border == 'same':
        pad = (W_shape[-2] / 2, W_shape[-1] / 2)
    else:
        pad = (0, 0)

    if group == 1:
        tensor = theano.sandbox.cuda.dnn.dnn_conv(
            tensor,
            W,
            subsample=subsample,
            border_mode=pad,
        )
    else:
        s = T.repeat(tensor.shape[1]/group, group)
        outputs = []
        for i, t in enumerate(T.split(tensor, s, group, axis=1)):
            W_ = W[i*W_shape[0]/group:(i+1)*W_shape[0]/group]
            outputs.append(theano.sandbox.cuda.dnn.dnn_conv(
                t,
                W_,
                subsample=subsample,
                border_mode=pad,
            ))
        tensor = T.concatenate(outputs, axis=1)
    tensor = tensor + b[None, :, None, None]

    return tensor
Exemplo n.º 17
0
def split(x, axis, split_size):
    """
    Split a ops along one axis.

    Parameters
    ----------
    x: ops

    axis: int

    split_size: int or list/tuple of int
        x.shape[axis] must be divided by split_size.
        or x.shape[axis] is the summation of split_size.
    Returns
    -------
    A tuple of subtensor.
    """
    assert axis < x.ndim, 'Dimension out of range!'

    if isinstance(split_size, int):
        _split_size = [x.shape[axis] // split_size] * split_size

    elif isinstance(split_size, (list, tuple)):
        _split_size = split_size
    else:
        raise TypeError

    if x.ndim == 0:

        return [x for _ in range(len(_split_size))]

    return T.split(x,
                   splits_size=_split_size,
                   n_splits=len(_split_size),
                   axis=axis)
Exemplo n.º 18
0
    def __call__(self, xs, h0):
        """

        Args:
            xs: (n_batch, n_input_ch, n_freq, n_time)
            h0: (n_batch, n_input_ch, n_freq)

        Returns:
            hs (n_batch, n_output_ch, n_freq, n_time)
        """
        n_batch, n_input_ch, n_freq, n_time = xs.shape
        xs_batch = xs.transpose([3, 0, 1, 2]).reshape(
            [n_time * n_batch, n_input_ch, n_freq])
        hxs = self.conv_wx(
            xs_batch)  # (n_time * n_batch, n_output_ch * 3, n_freq)
        hxs = hxs.reshape([n_time, n_batch, self.n_output_ch * 4,
                           -1])  # (n_time, n_batch, n_output_ch * 3, n_freq)
        ss = (self.n_output_ch * 2, self.n_output_ch)
        hxs = tt.split(hxs, splits_size=ss, n_splits=len(ss), axis=3)

        def step(hx_ir, hx_hat, h_prev):
            hhs = self.conv_wh(h_prev)
            hh_ir, hh_hat = tt.split(hhs,
                                     splits_size=ss,
                                     n_splits=len(ss),
                                     axis=1)
            ir = tt.nnet.sigmoid(hx_ir + hh_ir)
            i, r = tt.split(ir, [self.n_output_ch] * 2, 2, axis=1)
            hat = tt.tanh(hx_hat + r * hh_hat)
            return (1.0 - i) * hat + i * h_prev

        hs, self.updates = theano.scan(
            step, sequences=hxs,
            outputs_info=h0)  # (n_time, n_batch, n_output_ch)
        return hs.transpose([1, 2, 0])
Exemplo n.º 19
0
def create_iter_funcs_valid(l_out, bs=None, N=50, mc_dropout=False):
    X = T.tensor4('X')
    y = T.ivector('y')
    X_batch = T.tensor4('X_batch')
    y_batch = T.ivector('y_batch')

    if not mc_dropout:
        y_hat = layers.get_output(l_out, X, deterministic=True)
    else:
        if bs is None:
            raise ValueError('a fixed batch size is required for mc dropout')
        X_repeat = T.extra_ops.repeat(X, N, axis=0)
        y_sample = layers.get_output(
            l_out, X_repeat, deterministic=False)

        sizes = [X_repeat.shape[0] / X.shape[0]] * bs
        y_sample_split = T.as_tensor_variable(
            T.split(y_sample, sizes, bs, axis=0))
        y_hat = T.mean(y_sample_split, axis=1)

    valid_loss = T.mean(
        T.nnet.categorical_crossentropy(y_hat, y))
    valid_acc = T.mean(
        T.eq(y_hat.argmax(axis=1), y))

    valid_iter = theano.function(
        inputs=[theano.Param(X_batch), theano.Param(y_batch)],
        outputs=[valid_loss, valid_acc],
        givens={
            X: X_batch,
            y: y_batch,
        },
    )

    return valid_iter
Exemplo n.º 20
0
def maxout(X):
    if X.ndim==4:
        dim = 1
    else:
        dim = -1
    split_size = X.shape[dim]//2
    split1,split2 = T.split(X,[split_size,split_size],2,axis=dim)
    return T.maximum(split1,split2)
Exemplo n.º 21
0
    def conv(self,
             input,
             k_h,
             k_w,
             c_o,
             s_h,
             s_w,
             name,
             relu=True,
             pad_h=0,
             pad_w=0,
             group=1,
             biased=True):

        convolve = lambda i, k: T.nnet.conv2d(i,
                                              k,
                                              border_mode=(pad_h, pad_w),
                                              subsample=(s_h, s_w),
                                              filter_flip=False)
        kernel_data = np.ones((c_o, 1, k_h, k_w), dtype=np.float32)
        kernel = theano.shared(kernel_data)
        self.var_dict[name + "weights"] = kernel
        self.grad_params.append(kernel)

        if group == 1:
            output = convolve(input, kernel)
        else:
            c_i = T.shape(input)[1]
            input_groups = T.split(input, [c_i // group] * group, group, 1)
            kernel_groups = T.split(kernel, [c_o // group] * group, group, 0)
            output_groups = [
                convolve(i, k) for i, k in zip(input_groups, kernel_groups)
            ]
            output = T.concatenate(output_groups, 1)

        if biased:
            biased_data = np.ones(c_o, dtype=np.float32)
            biases = theano.shared(biased_data)
            self.grad_params.append(biases)
            self.var_dict[name + "biases"] = biases
            output = output + biases.dimshuffle('x', 0, 'x', 'x')

        if relu:
            output = T.nnet.relu(output)
        return output
Exemplo n.º 22
0
 def encode(self, xs):
     h = self.conv1(xs)
     h = self.act(h)
     h = self.conv2(h)
     h = self.act(h)
     h = h.reshape([xs.shape[0], -1])
     h = self.act(self.fc1(h))
     encoded = self.fc2(h)
     return tt.split(encoded, [n_latent] * 2, 2, axis=1)
Exemplo n.º 23
0
	def get_output(self, train=False):
		inp = self.get_input(train)
		if self.axis == 2:
			new_axis_size = inp.shape[2] / 2
			new_size = (inp.shape[0], inp.shape[1], new_axis_size)
			mu, sigma = T.split(inp, [new_axis_size, new_axis_size], 2, axis=2)
			return mu + self.rng.normal(size=new_size) * sigma
		else:
			raise Exception('Other axes not implemented.')
Exemplo n.º 24
0
	def get_output(self, train=False):
		params = self.param_input.get_output(train)
		if self.axis_to_split != 2:
			raise Exception('Other axes not implemented.')
		new_axis_size = params.shape[2] / 2
		mu, sigma = T.split(params, [new_axis_size, new_axis_size], 2, axis=2)
		err = T.mean(
			# Put a floor on the SD, to prevent division by 0.
			(mu - self.point)**2 / (sigma**2 + MIN_SD**2),
			axis=self.axis_to_split)
		return err
Exemplo n.º 25
0
def _log_partition_symfunc():
    natural_params = T.vector()
    size = natural_params.shape[0] // 4
    np1, np2, np3, np4 = T.split(natural_params, 4 * [size], 4)

    log_Z = T.sum(T.gammaln(.5 * (np4 + 1)))
    log_Z += T.sum(- .5 * (np4 + 1) * T.log(.5 * (np1 - (np2 ** 2) / np3)))
    log_Z += T.sum(-.5 * T.log(np3))

    func = theano.function([natural_params], log_Z)
    grad_func = theano.function([natural_params],
                                T.grad(T.sum(log_Z), natural_params))
    return func, grad_func
Exemplo n.º 26
0
def lyr_lstm(name_,
             s_x_,
             s_cell_,
             s_hid_,
             idim_,
             hdim_,
             axis_=-1,
             lyr_linear_=lyr_linear,
             op_act_=T.tanh,
             op_gate_=T.nnet.sigmoid):
    global _g_params_di
    s_inp = T.join(axis_, s_x_, s_hid_)
    s_gates_lin, s_inp_lin = T.split(lyr_linear_(name_ + '_rec', s_inp,
                                                 idim_ + hdim_, hdim_ * 4),
                                     [hdim_ * 3, hdim_],
                                     2,
                                     axis=axis_)
    s_igate, s_fgate, s_ogate = T.split(op_gate_(s_gates_lin), [hdim_] * 3,
                                        3,
                                        axis=axis_)
    s_cell_tp1 = s_igate * op_act_(s_inp_lin) + s_fgate * s_cell_
    s_hid_tp1 = op_act_(s_cell_tp1) * s_ogate
    return s_cell_tp1, s_hid_tp1
    def call(self, y, mask=None):
        '''
            parameter y is supposed to have twice the length of the STF
        '''
        # Compute mask
        y1, y2 = T.split(K.transpose(y), [self.output_dim, self.output_dim], 2, axis=0)

        mask1 = K.abs(y1) / (K.abs(y1) + K.abs(y2) + 1)
        mask2 = K.abs(y2) / (K.abs(y1) + K.abs(y2) + 1)
        mask = K.concatenate([mask1, mask2])

        # Apply mask
        sft = shared(self.sfts[Mask_Data_Callback.idx])
        X1 = sft * K.transpose(mask1)
        X2 = sft * K.transpose(mask2)
        out = K.concatenate([X1, X2], axis=1)
        return out
Exemplo n.º 28
0
 def infer_shape(self, node, in_shapes):
     shape_a = in_shapes[0]
     n = node.inputs[1]
     axis = node.inputs[2]
     if len(shape_a) == 1:
         return [(n, )]
     elif isinstance(axis, tensor.TensorConstant):
         out_shape = (list(shape_a[0:axis.data.item()]) + [n] +
                      list(shape_a[axis.data + 1:]))
     else:
         l = len(shape_a)
         shape_a = tensor.stack(shape_a)
         out_shape = tensor.concatenate(
             (shape_a[0:axis], [n], shape_a[axis + 1:]))
         n_splits = [1] * l
         out_shape = tensor.split(out_shape, n_splits, l)
         out_shape = [a[0] for a in out_shape]
     return [out_shape]
Exemplo n.º 29
0
def lyr_gru(name_,
            s_x_,
            s_state_,
            idim_,
            hdim_,
            axis_=0,
            lyr_linear_=lyr_linear,
            op_act_=T.tanh,
            op_gate_=T.nnet.sigmoid):
    global _g_params_di
    s_inp = T.join(axis_, s_x_, s_state_)
    s_igate = lyr_linear_(name_ + '_igate', idim_ + hdim_, idim_)
    s_inp_gated = T.join(axis_, s_x_ * op_gate_(s_igate), s_state_)
    s_gate_lin, s_state_tp1_lin = T.split(
        lyr_linear_(name_ + '_gate', idim_ + hdim_, hdim_ * 2), [hdim_, hdim_],
        2, axis_)
    s_gate = op_gate_(s_gate_lin)
    return s_state_ * s_gate + op_act_(s_state_tp1_lin) * (1. - s_gate)
Exemplo n.º 30
0
 def infer_shape(self, node, in_shapes):
     shape_a = in_shapes[0]
     n = node.inputs[1]
     axis = node.inputs[2]
     if len(shape_a) == 1:
         return [(n,)]
     elif isinstance(axis, tensor.TensorConstant):
         out_shape = (list(shape_a[0: axis.data.item()]) + [n] +
                      list(shape_a[axis.data + 1:]))
     else:
         l = len(shape_a)
         shape_a = tensor.stack(shape_a)
         out_shape = tensor.concatenate((shape_a[0: axis], [n],
                                         shape_a[axis + 1:]))
         n_splits = [1] * l
         out_shape = tensor.split(out_shape, n_splits, l)
         out_shape = [a[0] for a in out_shape]
     return [out_shape]
Exemplo n.º 31
0
    def call(self, y, mask=None):
        '''
            parameter y is supposed to have twice the length of the STF
        '''
        # Compute mask
        y1, y2 = T.split(K.transpose(y), [self.output_dim, self.output_dim],
                         2,
                         axis=0)

        mask1 = K.abs(y1) / (K.abs(y1) + K.abs(y2) + 1)
        mask2 = K.abs(y2) / (K.abs(y1) + K.abs(y2) + 1)
        mask = K.concatenate([mask1, mask2])

        # Apply mask
        sft = shared(self.sfts[Mask_Data_Callback.idx])
        X1 = sft * K.transpose(mask1)
        X2 = sft * K.transpose(mask2)
        out = K.concatenate([X1, X2], axis=1)
        return out
Exemplo n.º 32
0
def test_local_split():
    """ Test that the GpuSplit op is being applied and works """
    # Construct symbolic split
    x = tensor.fvector()
    splits = tensor.lvector()
    ra, rb, rc = tensor.split(x, splits, n_splits=3, axis=0)
    # Compile function to use CPU
    f = theano.function([x, splits], [ra, rb, rc], mode=mode_without_gpu)
    # Get values for CPU version
    cpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
    l = f.maker.fgraph.toposort()
    # Ensure that one op is theano.tensor.Split
    assert any([isinstance(o.op, theano.tensor.Split) for o in l])
    # GPU version
    f = theano.function([x, splits], [ra, rb, rc], mode=mode_with_gpu)
    gpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
    l = f.maker.fgraph.toposort()
    assert any([isinstance(o.op, theano.sandbox.cuda.GpuSplit) for o in l])
    # Check equality
    assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])
Exemplo n.º 33
0
def test_local_split():
    """ Test that the GpuSplit op is being applied and works """
    # Construct symbolic split
    x = tensor.fvector()
    splits = tensor.lvector()
    ra, rb, rc = tensor.split(x, splits, n_splits=3, axis=0)
    # Compile function to use CPU
    f = theano.function([x, splits], [ra, rb, rc], mode=mode_without_gpu)
    # Get values for CPU version
    cpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
    l = f.maker.fgraph.toposort()
    # Ensure that one op is theano.tensor.Split
    assert any([isinstance(o.op, theano.tensor.Split) for o in l])
    # GPU version
    f = theano.function([x, splits], [ra, rb, rc], mode=mode_with_gpu)
    gpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
    l = f.maker.fgraph.toposort()
    assert any([isinstance(o.op, theano.sandbox.cuda.GpuSplit) for o in l])
    # Check equality
    assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])
Exemplo n.º 34
0
    def get_output_for(self, input, deterministic=False, **kwargs):
        def _phase_shift(input, r):
            bsize, c, a, b = input.shape[
                0], 1, self.output_shape[2] // r, self.output_shape[3] // r
            X = T.reshape(input, (bsize, r, r, a, b))
            X = T.transpose(X, (0, 3, 4, 1, 2))  # bsize, a, b, r2,r1
            X = T.split(x=X, splits_size=[1] * a, n_splits=a,
                        axis=1)  # a, [bsize, b, r, r]
            X = [T.reshape(x, (bsize, b, r, r)) for x in X]
            X = T.concatenate(X, axis=2)  # bsize, b, a*r, r
            X = T.split(x=X, splits_size=[1] * b, n_splits=b,
                        axis=1)  # b, [bsize, a*r, r]
            X = [T.reshape(x, (bsize, a * r, r)) for x in X]
            X = T.concatenate(X, axis=2)  # bsize, a*r, b*r
            return X.dimshuffle(0, 'x', 1, 2)

        Xc = T.split(x=input,
                     splits_size=[input.shape[1] // self.c] * self.c,
                     n_splits=self.c,
                     axis=1)
        return T.concatenate([_phase_shift(xc, self.r) for xc in Xc], axis=1)
Exemplo n.º 35
0
def create_iter_funcs_test(l_out, bs, N=50):
    X = T.tensor4('X')
    X_batch = T.tensor4('X_batch')

    X_repeat = T.extra_ops.repeat(X, N, axis=0)
    y_sample = layers.get_output(l_out, X_repeat, deterministic=False)

    # the number of splits needs to be pre-defined
    sizes = [X_repeat.shape[0] / X.shape[0]] * bs
    y_sample_split = T.as_tensor_variable(T.split(y_sample, sizes, bs, axis=0))
    y_hat = T.mean(y_sample_split, axis=1)
    #y_var = T.var(y_sample_split, axis=1)

    test_iter = theano.function(
        inputs=[theano.Param(X_batch)],
        outputs=y_hat,
        #outputs=[y_hat, y_var],
        givens={
            X: X_batch,
        },
    )

    return test_iter
Exemplo n.º 36
0
def split(vec,sizes):
  return T.split(vec,sizes,len(sizes)) if len(sizes) > 1 else [vec]
Exemplo n.º 37
0
 def compute(self, x):
     for layer in self.layers:
         x = layer.compute(x)
     return T.split(x, self.splits, self.no_splits, axis=-1)
Exemplo n.º 38
0
def lyr_gru_nogate(name_, s_x_, s_state_, idim_, sdim_, lyr_linear_, axis_=-1):
    s_interp_lin, s_state_tp1_lin = T.split(
        lyr_linear_(name_ + '_main', T.join(axis_, s_x_, s_state_),
                    idim_ + sdim_, sdim_ * 2), [sdim_] * 2, 2, axis_)
    s_interp = T.nnet.sigmoid(s_interp_lin)
    return T.tanh(s_state_tp1_lin) * s_interp + s_state_ * (1. - s_interp)
def split_half(x, axis=0):
    size1 = x.shape[axis] / 2
    size2 = x.shape[axis] - size1
    return tt.split(x, [size1, size2], 2, axis=axis)
Exemplo n.º 40
0
    def dnc_step(s_x_, s_lstm_cell_, s_lstm_hid_, s_usage_, s_preced_, s_link_,
                 s_mem_, s_read_val_, s_read_wgt_, s_write_wgt_):
        s_states_li_ = [
            s_lstm_cell_, s_lstm_hid_, s_usage_, s_preced_, s_link_, s_mem_,
            s_read_val_, s_read_wgt_, s_write_wgt_
        ]
        s_inp = T.join(-1, s_x_, s_read_val_.flatten())

        s_lstm_cell_tp1, s_lstm_hid_tp1 = lyr.lyr_lstm('ctrl', s_inp,
                                                       s_lstm_cell_,
                                                       s_lstm_hid_,
                                                       ctrl_inp_size,
                                                       ctrl_wm_size)
        s_out, s_itrface = T.split(lyr.lyr_linear('ctrl_out',
                                                  s_lstm_hid_tp1,
                                                  ctrl_wm_size,
                                                  ctrl_wm_size,
                                                  bias_=None),
                                   [OUT_DIMS, itrface_size],
                                   2,
                                   axis=-1)
        splits_len = [
            N_READS * CELL_SIZE, N_READS, CELL_SIZE, 1, CELL_SIZE, CELL_SIZE,
            N_READS, 1, 1, 3 * N_READS
        ]
        s_keyr, s_strr, s_keyw, s_strw, \
            s_ers, s_write, s_freeg, s_allocg, s_writeg, s_rmode = \
            T.split(s_itrface, splits_len, 10, axis=-1)

        s_keyr = T.reshape(s_keyr, (CELL_SIZE, N_READS))
        s_strr = 1. + T.nnet.softplus(s_strr)
        s_strw = 1. + T.nnet.softplus(s_strw[0])
        s_ers = T.nnet.sigmoid(s_ers)
        s_freeg = T.nnet.sigmoid(s_freeg)
        s_allocg = T.nnet.sigmoid(s_allocg[0])
        s_writeg = T.nnet.sigmoid(s_writeg[0])
        s_rmode = T.nnet.softmax(T.reshape(s_rmode, (N_READS, 3))).dimshuffle(
            1, 0, 'x')

        s_mem_retention = T.prod(1. - s_freeg.dimshuffle(0, 'x') * s_read_wgt_,
                                 axis=0)

        s_usage_tp1 = s_mem_retention * (s_usage_ + s_write_wgt_ -
                                         s_usage_ * s_write_wgt_)
        s_usage_order = T.argsort(s_usage_tp1)
        s_usage_order_inv = T.inverse_permutation(s_usage_order)
        s_usage_tp1_sorted = s_usage_tp1[s_usage_order]

        s_alloc_wgt = ((1. - s_usage_tp1_sorted) * (T.join(
            0, np.array([1.], dtype=th.config.floatX),
            op_cumprod_hack(s_usage_tp1_sorted[:-1]))))[s_usage_order_inv]

        s_content_wgt_w = T.nnet.softmax(
            s_strw * T.dot(s_mem_, s_keyw) /
            (T.sqrt(EPS + T.sum(T.sqr(s_mem_), axis=-1) *
                    T.sum(T.sqr(s_keyw))))).flatten()

        s_write_wgt_tp1 = s_writeg * (s_allocg * s_alloc_wgt +
                                      (1. - s_allocg) * s_content_wgt_w)

        s_mem_tp1 = s_mem_ * (1. - T.outer(s_write_wgt_tp1, s_ers)) + T.outer(
            s_write_wgt_tp1, s_write)
        s_preced_tp1 = (1. - T.sum(s_write_wgt_)) * s_preced_ + s_write_wgt_tp1

        s_link_tp1 = (1. - s_write_wgt_tp1 - s_write_wgt_tp1.dimshuffle(
            0, 'x')) * s_link_ + T.outer(s_write_wgt_tp1, s_preced_)
        s_link_tp1 = s_link_tp1 * (1. - T.identity_like(s_link_tp1))  #X
        s_fwd = T.dot(s_read_wgt_, s_link_tp1.transpose())  #X
        s_bwd = T.dot(s_read_wgt_, s_link_tp1)  #X

        s_content_wgt_r = T.nnet.softmax(
            T.dot(s_mem_tp1, s_keyr) /
            (T.sqrt(EPS + T.outer(T.sum(T.sqr(s_mem_tp1), axis=-1),
                                  T.sum(T.sqr(s_keyr), axis=0))))).transpose()
        s_read_wgt_tp1 = s_bwd * s_rmode[0] + s_content_wgt_r * s_rmode[
            1] + s_fwd * s_rmode[2]
        s_read_val_tp1 = T.dot(s_read_wgt_tp1, s_mem_tp1)

        s_y = s_out + lyr.lyr_linear('read_out',
                                     s_read_val_tp1.flatten(),
                                     CELL_SIZE * N_READS,
                                     OUT_DIMS,
                                     bias_=None)
        return [
            s_y, s_lstm_cell_tp1, s_lstm_hid_tp1, s_usage_tp1, s_preced_tp1,
            s_link_tp1, s_mem_tp1, s_read_val_tp1, s_read_wgt_tp1,
            s_write_wgt_tp1
        ]
Exemplo n.º 41
0
def split(tensor, size_splits, n_splits, axis):
    if KERAS_BACKEND == 'theano':
        return T.split(tensor, size_splits, n_splits, axis=axis)
    else:
        return tensorflow.split(tensor, size_splits, axis=axis)