示例#1
0
 def _ELMAE(self, inputX, hiddenunit, filtersize, stride):
     assert inputX.shape[1] == 1  # ELMAE的输入通道数必须为1,即只有一张特征图
     # 生成随机正交滤波器
     filters = init.GlorotNormal().sample((hiddenunit, np.prod(filtersize)))
     filters = orthonormalize(filters)
     filters = filters.reshape((hiddenunit, 1) + filtersize)
     bias = init.Normal().sample(hiddenunit)
     bias = orthonormalize(bias)
     # 卷积前向输出,一定要pad=0和取patch时一致
     convout = convbiasact_decomp(inputX,
                                  filters,
                                  bias,
                                  pad=0,
                                  stride=stride)
     del filters, bias
     # 将卷积的结果4维矩阵改变为2维
     hiddens = convout.transpose((0, 2, 3, 1)).reshape((-1, hiddenunit))
     del convout
     # 取图像的patch
     im2col = myUtils.basic.Im2ColOp(psize=filtersize[0], stride=stride[0])
     patches = im2col.transform(inputX)  # 在取图像的patch时不做pad
     patches = patches.reshape((-1, np.prod(filtersize)))
     # 计算beta
     beta = compute_beta(hiddens, patches, self.C)
     del hiddens, patches
     return beta
    def __init__(self,
                 incomings,
                 nfilters,
                 nrings=5,
                 nrays=16,
                 W=LI.GlorotNormal(),
                 b=LI.Constant(0.0),
                 normalize_rings=False,
                 normalize_input=False,
                 take_max=True,
                 nonlinearity=L.nonlinearities.rectify,
                 **kwargs):
        super(GCNNLayer, self).__init__(incomings, **kwargs)
        self.nfilters = nfilters
        self.filter_shape = (nfilters, self.input_shapes[0][1], nrings, nrays)
        self.nrings = nrings
        self.nrays = nrays
        self.normalize_rings = normalize_rings
        self.normalize_input = normalize_input
        self.take_max = take_max
        self.nonlinearity = nonlinearity

        self.W = self.add_param(W, self.filter_shape, name="W")

        biases_shape = (nfilters, )
        self.b = self.add_param(b, biases_shape, name="b", regularizable=False)
示例#3
0
    def _get_beta(self, oneChannel, randChannel, addpad):
        # assert inputX.ndim == 4 and inputX.shape[1] == 1  # ELMAE的输入通道数必须为1,即只有一张特征图
        # 使用聚类获取每一类中不同的patch

        # 生成随机正交滤波器
        filters = init.GlorotNormal().sample(
            (self.n_hidden, self.filter_size**2))
        filters = orthonormalize(filters)
        filters = filters.reshape(
            (self.n_hidden, 1, self.filter_size, self.filter_size))
        bias = init.Normal().sample(self.n_hidden)
        bias = orthonormalize(bias)
        # 卷积前向输出,和取patch时一致
        pad = self.filter_size // 2 if addpad else 0
        stride = self.filter_size // 2 + 1
        hiddens = convbiasact_decomp(oneChannel,
                                     filters,
                                     bias,
                                     pad=pad,
                                     stride=stride)
        hiddens = hiddens.transpose((0, 2, 3, 1)).reshape((-1, self.n_hidden))
        # 随机跨通道取图像patch
        patches = im2col(randChannel, self.filter_size, stride=stride, pad=pad)
        # 计算beta
        beta = compute_beta(hiddens, patches, self.C)
        beta = beta.reshape(
            (self.n_hidden, 1, self.filter_size, self.filter_size))
        return beta
示例#4
0
    def _get_beta(self, inputX, ch, addpad):
        # assert inputX.ndim == 4 and inputX.shape[1] == 1  # ELMAE的输入通道数必须为1,即只有一张特征图
        batches, channels, rows, cols = inputX.shape
        oneChannel = inputX[:, ch, :, :].reshape((batches, 1, rows, cols))
        # 使用聚类获取每一类中不同的patch

        # 生成随机正交滤波器
        filters = init.GlorotNormal().sample((self.hidden_unit, self.filter_size ** 2))
        filters = orthonormalize(filters)
        filters = filters.reshape((self.hidden_unit, 1, self.filter_size, self.filter_size))
        bias = init.Normal().sample(self.hidden_unit)
        bias = orthonormalize(bias)
        # 卷积前向输出,和取patch时一致
        pad = self.filter_size // 2 if addpad else 0
        stride = self.filter_size // 2 + 1
        hiddens = convbiasact_decomp(oneChannel, filters, bias, pad=pad, stride=stride)
        hiddens = hiddens.transpose((0, 2, 3, 1)).reshape((-1, self.hidden_unit))
        # 随机跨通道取图像patch
        # batchindex = np.arange(batches)
        # channelindex = np.random.randint(channels, size=batches)
        # randChannel = inputX[batchindex, channelindex, :, :].reshape((batches, 1, rows, cols))
        patches = im2col(oneChannel, self.filter_size, stride=stride, pad=pad)
        # 计算beta
        beta = compute_beta(hiddens, patches, self.C)
        beta = beta.reshape((self.hidden_unit, 1, self.filter_size, self.filter_size))
        return beta
示例#5
0
    def _ELMAE(self, inputX, hiddenunit, filtersize):
        assert inputX.shape[1] == 1  # ELMAE的输入通道数必须为1,即只有一张特征图
        # 生成随机正交滤波器
        filters = init.GlorotNormal().sample((hiddenunit, np.prod(filtersize)))
        filters = orthonormalize(filters)
        filters = filters.reshape((hiddenunit, 1) + filtersize)
        bias = init.Normal().sample(hiddenunit)
        bias = orthonormalize(bias)
        # 卷积前向输出,一定要pad=0和取patch时一致
        stride = 4
        convout = convbiasact_decomp(inputX,
                                     filters,
                                     bias,
                                     pad=0,
                                     stride=(stride, stride))
        # 将卷积的结果4维矩阵改变为2维
        hiddens = convout.transpose((0, 2, 3, 1)).reshape((-1, hiddenunit))
        # 取图像的patch
        patches = im2col(inputX, filtersize[0], stride=stride, pad=0)
        # 计算beta
        beta1 = compute_beta(hiddens, patches, self.C)

        stride = 1
        convout = convbiasact_decomp(inputX,
                                     filters,
                                     bias,
                                     pad=0,
                                     stride=(stride, stride))
        # 将卷积的结果4维矩阵改变为2维
        hiddens = convout.transpose((0, 2, 3, 1)).reshape((-1, hiddenunit))
        # 取图像的patch
        patches = im2col(inputX, filtersize[0], stride=stride, pad=0)
        # 计算beta
        beta2 = compute_beta(hiddens, patches, self.C)
        return beta1, beta2
示例#6
0
 def _get_beta(self, oneChannel, addpad):
     # assert inputX.ndim == 4 and inputX.shape[1] == 1  # ELMAE的输入通道数必须为1,即只有一张特征图
     # 生成随机正交滤波器
     filters = init.GlorotNormal().sample(
         (self.n_hidden, self.filter_size**2))
     filters = orthonormalize(filters)
     filters = filters.reshape(
         (self.n_hidden, 1, self.filter_size, self.filter_size))
     bias = init.Normal().sample(self.n_hidden)
     bias = orthonormalize(bias)
     # 卷积前向输出,和取patch时一致
     pad = self.filter_size // 2 if addpad else 0
     stride = self.filter_size // 2 + 1
     noiseChannel = add_mn(oneChannel, p=0.25)
     hiddens = convbiasact_decomp(noiseChannel,
                                  filters,
                                  bias,
                                  pad=pad,
                                  stride=stride)
     hiddens = hiddens.transpose((0, 2, 3, 1)).reshape((-1, self.n_hidden))
     # 随机跨通道取图像patch
     patches = im2col(oneChannel, self.filter_size, stride=stride, pad=pad)
     # randPatch = add_mn_row(patches, p=0.25)
     # hiddens = np.dot(randPatch, filters.T) + bias
     # hiddens = relu(hiddens)
     # 计算beta
     beta = compute_beta_val(hiddens, patches, 5)
     beta = beta.reshape(
         (self.n_hidden, 1, self.filter_size, self.filter_size))
     return beta
示例#7
0
    def nn_fn(self):

        l_in_z = InputLayer((None, self.num_choices, self.z_dim))
        l_in_mask = InputLayer((None, self.num_choices))

        l_h = l_in_z

        for h in range(self.nn_depth - 1):
            l_h = DenseLayer(l_h,
                             num_units=self.nn_hid_units,
                             b=None,
                             num_leading_axes=2)

        l_out_flat = DenseLayer(l_h,
                                num_units=1,
                                b=None,
                                nonlinearity=None,
                                num_leading_axes=2,
                                W=init.GlorotNormal(1.))

        l_out_pre_softmax = ReshapeLayer(l_out_flat, ([0], [1]))
        l_out_pre_softmax = SwitchLayer((l_in_mask, l_out_pre_softmax), 0,
                                        -np.inf)

        l_out = NonlinearityLayer(l_out_pre_softmax, softmax)

        return (l_in_z, l_in_mask), l_out
示例#8
0
 def get_train_output_for(self, inputX, inputy=None):
     self.W = init.GlorotNormal().sample((inputX.shape[1], self.hidden_unit))
     self.b = init.Normal().sample(self.hidden_unit)
     H = dotbiasact_decomp(inputX, self.W, self.b)
     self.beta = compute_beta(H, inputy, self.C)
     out = dot_decomp(H, self.beta)
     return out
示例#9
0
 def fit(self, X, y):
     y = myUtils.load.one_hot(y, len(np.unique(y)))
     self.W = init.GlorotNormal().sample((X.shape[1], self.n_hidden))
     self.b = init.Normal().sample(self.n_hidden)
     H = np.dot(X, self.W) + self.b
     H = relu(H)
     self.beta = compute_beta(H, y, self.C)
     return self
示例#10
0
 def __init__(self, incomings, nfilters, nrings=5, nrays=16,
              W=LI.GlorotNormal(), b=LI.Constant(0.0),
              normalize_rings=False, normalize_input=False, take_max=True, 
              nonlinearity=LN.rectify, **kwargs):
     super(ACNNLayer, self).__init__(incomings, nfilters, nrings, nrays,
              W, b,
              normalize_rings, normalize_input, take_max, 
              nonlinearity, **kwargs)
示例#11
0
 def fit(self, inputX, inputy):
     n_hidden = int(self.n_times * inputX.shape[1])
     inputy = myUtils.load.one_hot(inputy, len(np.unique(inputy)))
     self.W = init.GlorotNormal().sample((inputX.shape[1], n_hidden))
     self.b = init.Normal().sample(n_hidden)
     H = np.dot(inputX, self.W) + self.b
     H = relu(H)
     self.beta = compute_beta(H, inputy, self.C)
     return self
示例#12
0
 def get_train_output_for(self, inputX, inputy=None):
     inputX = self.pca.fit_transform(inputX)
     n_hidden = int(self.n_times * inputX.shape[1])
     self.W = init.GlorotNormal().sample((inputX.shape[1], n_hidden))
     self.b = init.Normal().sample(n_hidden)
     H = dotbiasact_decomp(inputX, self.W, self.b)
     self.beta = compute_beta(H, inputy, self.C)
     out = dot_decomp(H, self.beta)
     return out
示例#13
0
 def get_train_output(self, inputX, inputy):
     self.W = init.GlorotNormal().sample(
         (inputX.shape[1], self.hidden_unit))
     self.b = init.Normal().sample(self.hidden_unit)
     H = np.dot(inputX, self.W) + self.b
     H = relu(H)
     self.beta = compute_beta(H, inputy, self.C)
     out = np.dot(H, self.beta)
     return out
示例#14
0
 def get_train_output_for(self, inputX, inputy=None):
     n_hidden = int(self.n_times * inputX.shape[1])
     self.W = init.GlorotNormal().sample((inputX.shape[1], n_hidden))
     self.b = init.Normal().sample(n_hidden)
     H = np.dot(inputX, self.W) + self.b
     H = relu(H)
     self.beta = compute_beta_val(H, inputy, 3)
     out = np.dot(H, self.beta)
     return out
示例#15
0
 def __init__(self,
              W_in=init.GlorotNormal(1.0),
              W_hid=init.GlorotNormal(1.0),
              W_read=init.GlorotNormal(1.0),
              W_cell=init.Normal(1.0),
              b=init.Constant(0.),
              nonlinearity=nonlinearities.sigmoid):
     self.W_in = W_in
     self.W_hid = W_hid
     self.W_read = W_read
     # Don't store a cell weight vector when cell is None
     if W_cell is not None:
         self.W_cell = W_cell
     self.b = b
     # For the nonlinearity, if None is supplied, use identity
     if nonlinearity is None:
         self.nonlinearity = nonlinearities.identity
     else:
         self.nonlinearity = nonlinearity
        def lstm_layer(input,
                       nunits,
                       return_final,
                       backwards=False,
                       name='LSTM'):
            ingate = Gate(W_in=init.Uniform(0.01),
                          W_hid=init.Uniform(0.01),
                          b=init.Constant(0.0))
            forgetgate = Gate(W_in=init.Uniform(0.01),
                              W_hid=init.Uniform(0.01),
                              b=init.Constant(5.0))
            cell = Gate(
                W_cell=None,
                nonlinearity=T.tanh,
                W_in=init.Uniform(0.01),
                W_hid=init.Uniform(0.01),
            )
            outgate = Gate(W_in=init.Uniform(0.01),
                           W_hid=init.Uniform(0.01),
                           b=init.Constant(0.0))

            lstm = LSTMLayer(input,
                             num_units=nunits,
                             backwards=backwards,
                             peepholes=False,
                             ingate=ingate,
                             forgetgate=forgetgate,
                             cell=cell,
                             outgate=outgate,
                             name=name,
                             only_return_final=return_final)

            rec = RecurrentLayer(input,
                                 num_units=nunits,
                                 W_in_to_hid=init.GlorotNormal('relu'),
                                 W_hid_to_hid=init.GlorotNormal('relu'),
                                 backwards=backwards,
                                 nonlinearity=rectify,
                                 only_return_final=return_final,
                                 name=name)
            return lstm
    def __init__(self, n_v, n_h, trans_func=sigmoid):
        super(NADE, self).__init__(n_v, n_h, n_v, trans_func)
        self._srng = RandomStreams()
        self.n_hidden = n_h
        l_v = InputLayer((None, n_v))
        self.model = NADELayer(l_v,
                               n_h,
                               W=init.GlorotNormal(),
                               b=init.Constant(0.))
        self.model_params = get_all_params(self.model)

        self.sym_x = T.matrix('x')
示例#18
0
 def get_train_output_ensemble(self, inputX, inputy, n=10):
     self.W = init.GlorotNormal().sample(
         (inputX.shape[1], self.hidden_unit))
     self.b = init.Normal().sample(self.hidden_unit)
     outputs = []
     for _ in xrange(n):
         inputX, binomial1 = dropout(inputX, p=0.5)
         H = np.dot(inputX, self.W) + self.b
         H = relu(H)
         H, binomial2 = dropout(H, p=0.5)
         beta = compute_beta(H, inputy, self.C)
         out = np.dot(H, beta)
         outputs.append(np.copy(out))
         self.binomials.append((np.copy(binomial1), np.copy(binomial2)))
         self.betas.append(np.copy(beta))
     return outputs
示例#19
0
 def _addCCCPLayer(self, inputX, outchannels):
     batches, inchannels, rows, cols = inputX.shape
     inputX = inputX.transpose((0, 2, 3, 1)).reshape((-1, inchannels))
     W = init.GlorotNormal().sample((inchannels, outchannels))
     b = init.Normal().sample(outchannels)
     # AE前向计算隐层
     H = dotbiasact_decomp(inputX, W, b)
     del W, b
     # AE使用ELM计算输出矩阵
     beta = compute_beta(H, inputX, self.C).T
     # 前向输出CCCP结果
     cccpout = dot_decomp(inputX, beta)
     del inputX
     cccpout = cccpout.reshape((batches, rows, cols, -1)).transpose((0, 3, 1, 2))
     print 'add cccp layer'
     return cccpout, beta
示例#20
0
 def train(self, inputX, inputy):
     layerout = self._buildAE(inputX)
     rows, cols = layerout.shape
     classifierunit = cols * 5
     W = init.GlorotNormal().sample((cols, classifierunit))
     b = init.Normal().sample(classifierunit)
     H = dotbiasact_decomp(layerout, W, b)
     del layerout
     beta = compute_beta(H, inputy, self.C)
     out = dot_decomp(H, beta)
     del H
     self.paramsC['W'] = W
     self.paramsC['b'] = b
     self.paramsC['beta'] = beta
     ypred = np.argmax(out, axis=1)
     ytrue = np.argmax(inputy, axis=1)
     return np.mean(ypred == ytrue)
示例#21
0
    def forward(self, inputX, train=True):
        assert inputX.ndim == 4 and inputX.shape[1] == 1  # ELMAE的输入通道数必须为1,即只有一张特征图
        batches, channels, rows, cols = inputX.shape
        if train:
            patches = self._make_patches(inputX, fit=True, addpad=False)
            # 使用聚类获取每一类中不同的patch

            # 生成随机正交滤波器
            filters = init.GlorotNormal().sample((self.filter_size ** 2, self.hidden_unit))
            filters = orthonormalize(filters)
            bias = init.Normal().sample(self.hidden_unit)
            bias = orthonormalize(bias)
            # 卷积前向输出,和取patch时一致
            hiddens = dotbiasact_decomp(patches, filters, bias)
            # 计算beta
            self.beta = compute_beta(hiddens, patches, self.C).T
        patches = self._make_patches(inputX, fit=False, addpad=True)
        out = dot_decomp(patches, self.beta)
        out = out.reshape((batches, rows, cols, -1)).transpose((0, 3, 1, 2))
        return out
示例#22
0
    def train(self, inputX, inputy):
        layerout1, layerout2 = self._buildAE(inputX)
        rows, cols = layerout1.shape
        classifierunit = cols * 5
        W = init.GlorotNormal().sample((cols, classifierunit))
        b = init.Normal().sample(classifierunit)
        H1 = dotbiasact_decomp(layerout1, W, b)
        beta1 = compute_beta(H1, inputy, self.C)
        out1 = dot_decomp(H1, beta1)

        H2 = dotbiasact_decomp(layerout2, W, b)
        beta2 = compute_beta(H2, inputy, self.C)
        out2 = dot_decomp(H2, beta2)

        self.paramsC['W'] = W
        self.paramsC['b'] = b
        self.paramsC['beta1'] = beta1
        self.paramsC['beta2'] = beta2
        ypred1 = np.argmax(out1, axis=1)
        ypred2 = np.argmax(out2, axis=1)
        ytrue = np.argmax(inputy, axis=1)
        return np.mean(ypred1 == ytrue), np.mean(ypred2 == ytrue)
示例#23
0
 def __init__(self, incomings, nfilters, nrings=5, nrays=16,
              W=LI.GlorotNormal(), b=LI.Constant(0.0),
              normalize_rings=False, normalize_input=False, take_max=True, 
              nonlinearity=LN.rectify, **kwargs):
     super(GCNNLayer, self).__init__(incomings, **kwargs)
     
     # patch operator sizes
     self.nfilters = nfilters
     self.nrings = nrings
     self.nrays = nrays
     self.filter_shape = (nfilters, self.input_shapes[0][1], nrings, nrays)
     self.biases_shape = (nfilters, )
     # path operator parameters
     self.normalize_rings = normalize_rings
     self.normalize_input = normalize_input
     self.take_max = take_max
     self.nonlinearity = nonlinearity
     
     # layer parameters:
     # y = Wx + b, where x are the input features and y are the output features
     self.W = self.add_param(W, self.filter_shape, name="W")
     self.b = self.add_param(b, self.biases_shape, name="b", regularizable=False)
    def __init__(self,
                 n_x,
                 n_z,
                 qz_hid,
                 px_hid,
                 filters,
                 seq_length=50,
                 nonlinearity=rectify,
                 px_nonlinearity=None,
                 x_dist='linear',
                 batchnorm=False,
                 seed=1234):
        """
        Weights are initialized using the Bengio and Glorot (2010) initialization scheme.
        :param n_x: Number of inputs.
        :param n_z: Number of latent.
        :param qz_hid: List of number of deterministic hidden q(z|a,x,y).
        :param px_hid: List of number of deterministic hidden p(a|z,y) & p(x|z,y).
        :param nonlinearity: The transfer function used in the deterministic layers.
        :param x_dist: The x distribution, 'bernoulli', 'multinomial', or 'gaussian'.
        :param batchnorm: Boolean value for batch normalization.
        :param seed: The random seed.
        """
        super(CVAE, self).__init__(n_x, qz_hid + px_hid, n_z, nonlinearity)
        self.x_dist = x_dist
        self.n_x = n_x
        self.seq_length = seq_length
        self.n_z = n_z
        self.batchnorm = batchnorm
        self._srng = RandomStreams(seed)

        # Pool layer cache
        pool_layers = []

        # Decide Glorot initializaiton of weights.
        init_w = 1e-3
        hid_w = ""
        if nonlinearity == rectify or nonlinearity == softplus:
            hid_w = "relu"

        # Define symbolic variables for theano functions.
        self.sym_x = T.tensor3('x')  # inputs
        self.sym_z = T.matrix('z')
        self.sym_samples = T.iscalar('samples')  # MC samples

        # Assist methods for collecting the layers
        def dense_layer(layer_in,
                        n,
                        dist_w=init.GlorotNormal,
                        dist_b=init.Normal):
            dense = DenseLayer(layer_in, n, dist_w(hid_w), dist_b(init_w),
                               None)
            if batchnorm:
                dense = bn(dense)
            return NonlinearityLayer(dense, self.transf)

        def stochastic_layer(layer_in, n, samples, nonlin=None):
            mu = DenseLayer(layer_in, n, init.Normal(init_w),
                            init.Normal(init_w), nonlin)
            logvar = DenseLayer(layer_in, n, init.Normal(init_w),
                                init.Normal(init_w), nonlin)
            return SampleLayer(mu, logvar, eq_samples=samples,
                               iw_samples=1), mu, logvar

        def conv_layer(layer_in, filter, stride=(1, 1), pool=1, name='conv'):
            l_conv = Conv2DLayer(layer_in,
                                 num_filters=filter,
                                 filter_size=(3, 1),
                                 stride=stride,
                                 pad='full',
                                 name=name)
            if pool > 1:
                l_conv = MaxPool2DLayer(l_conv, pool_size=(pool, 1))
                pool_layers.append(l_conv)
            return l_conv

        # Reshape input
        l_x_in = InputLayer((None, seq_length, n_x), name='Input')
        l_x_in_reshp = ReshapeLayer(l_x_in, (-1, 1, seq_length, n_x))
        print("l_x_in_reshp", l_x_in_reshp.output_shape)

        # CNN encoder implementation
        l_conv_enc = l_x_in_reshp
        for filter, stride, pool in filters:
            l_conv_enc = conv_layer(l_conv_enc, filter, stride, pool)
            print("l_conv_enc", l_conv_enc.output_shape)

        # Pool along last 2 axes
        l_global_pool_enc = GlobalPoolLayer(l_conv_enc)
        l_enc = dense_layer(l_global_pool_enc, n_z)
        print("l_enc", l_enc.output_shape)

        # Recognition q(z|x)
        l_qz = l_enc
        for hid in qz_hid:
            l_qz = dense_layer(l_qz, hid)
        l_qz, l_qz_mu, l_qz_logvar = stochastic_layer(l_qz, n_z,
                                                      self.sym_samples)
        print("l_qz", l_qz.output_shape)

        # Inverse pooling
        l_global_depool = InverseLayer(l_qz, l_global_pool_enc)
        print("l_global_depool", l_global_depool.output_shape)

        # Reverse pool layer order
        pool_layers = pool_layers[::-1]

        # Decode
        l_deconv = l_global_depool
        for idx, filter in enumerate(filters[::-1]):
            filter, stride, pool = filter
            if pool > 1:
                l_deconv = InverseLayer(l_deconv, pool_layers[idx])
            l_deconv = Conv2DLayer(l_deconv,
                                   num_filters=filter,
                                   filter_size=(3, 1),
                                   stride=(stride, 1),
                                   W=init.GlorotNormal('relu'))
            print("l_deconv", l_deconv.output_shape)

        # The last l_conv layer should give us the input shape
        l_dec = Conv2DLayer(l_deconv,
                            num_filters=1,
                            filter_size=(3, 1),
                            pad='same',
                            nonlinearity=None)
        print("l_dec", l_dec.output_shape)

        # Flatten first two dimensions
        l_dec = ReshapeLayer(l_dec, (-1, n_x))

        l_px = l_dec
        if x_dist == 'bernoulli':
            l_px = DenseLayer(l_px, n_x, init.GlorotNormal(),
                              init.Normal(init_w), sigmoid)
        elif x_dist == 'multinomial':
            l_px = DenseLayer(l_px, n_x, init.GlorotNormal(),
                              init.Normal(init_w), softmax)
        elif x_dist == 'gaussian':
            l_px, l_px_mu, l_px_logvar = stochastic_layer(
                l_px, n_x, self.sym_samples, px_nonlinearity)
        elif x_dist == 'linear':
            l_px = DenseLayer(l_px, n_x, nonlinearity=None)

        # Reshape all the model layers to have the same size
        self.l_x_in = l_x_in

        self.l_qz = ReshapeLayer(l_qz, (-1, self.sym_samples, 1, n_z))
        self.l_qz_mu = DimshuffleLayer(l_qz_mu, (0, 'x', 'x', 1))
        self.l_qz_logvar = DimshuffleLayer(l_qz_logvar, (0, 'x', 'x', 1))

        self.l_px = DimshuffleLayer(
            ReshapeLayer(l_px, (-1, seq_length, self.sym_samples, 1, n_x)),
            (0, 2, 3, 1, 4))
        self.l_px_mu = DimshuffleLayer(ReshapeLayer(l_px_mu, (-1, seq_length, self.sym_samples, 1, n_x)), (0, 2, 3, 1, 4)) \
            if x_dist == "gaussian" else None
        self.l_px_logvar = DimshuffleLayer(ReshapeLayer(l_px_logvar, (-1, seq_length, self.sym_samples, 1, n_x)), (0, 2, 3, 1, 4)) \
            if x_dist == "gaussian" else None

        # Predefined functions
        inputs = {self.l_x_in: self.sym_x}
        outputs = get_output(l_qz, inputs, deterministic=True)
        self.f_qz = theano.function([self.sym_x, self.sym_samples], outputs)

        inputs = {l_qz: self.sym_z}
        outputs = get_output(self.l_px, inputs,
                             deterministic=True).mean(axis=(1, 2))
        self.f_px = theano.function([self.sym_z, self.sym_samples], outputs)

        outputs = get_output(self.l_px_mu, inputs,
                             deterministic=True).mean(axis=(1, 2))
        self.f_mu = theano.function([self.sym_z, self.sym_samples], outputs)

        outputs = get_output(self.l_px_logvar, inputs,
                             deterministic=True).mean(axis=(1, 2))
        self.f_var = theano.function([self.sym_z, self.sym_samples], outputs)

        # Define model parameters
        self.model_params = get_all_params([self.l_px])
        self.trainable_model_params = get_all_params([self.l_px],
                                                     trainable=True)
示例#25
0
    def __init__(
            self,
            input,  # input images (n_batch x n_channels x img_height x img_width)
            #n_batch=64, # number of batch
        k=1,  # number of glimps scales
            patch=8,  # size of glimps patch
            n_steps=6,  # number of glimps steps
            lambda_=10.0,  # mixing ratio between
            n_h_g=128,  # number of hidden units in h_g (in glimps network)
            n_h_l=128,  # number of hidden units in h_l (in glimps network)
            n_f_g=256,  # number of hidden units in f_g (glimps network)
            n_f_h=256,  # number of hidden units in f_h (core network)
            #n_f_l=2, # dim of output of f_l (location network) i.e. 2
        n_classes=10,  # number of classes in classification problem
            learn_init=True,
            **kwargs):
        super(RAMLayer, self).__init__(input, **kwargs)

        if len(self.input_shape) is 3:
            self.n_batch = self.input_shape[0]
            self.n_channels = 1
            self.img_height = self.input_shape[1]
            self.img_width = self.input_shape[2]
        elif len(self.input_shape) is 4:
            self.n_batch = self.input_shape[0]
            self.n_channels = self.input_shape[1]
            self.img_height = self.input_shape[2]
            self.img_width = self.input_shape[3]
        else:
            raise ValueError(
                "Input should be either gray scale (ndim = 3) or color (ndim = 4) images."
                "Current ndim=%d" % self.ndim)

        self.k = k
        self.patch = patch
        self.n_steps = n_steps
        self.lambda_ = lambda_

        self.n_h_g = n_h_g
        self.n_h_l = n_h_l
        self.n_f_g = n_f_g
        self.n_f_h = n_f_h
        #self.n_f_l = 2
        self.n_classes = n_classes

        # for glimps network, f_g
        self.W_h_g = []
        for i in xrange(self.k):
            self.W_h_g.append(
                self.add_param(init.GlorotNormal(),
                               (self.n_channels * ((self.patch *
                                                    (2**i))**2), self.n_h_g),
                               name='W_h_g'))
        self.b_h_g = self.add_param(init.Constant(0.), (self.n_h_g, ),
                                    name='b_h_g')

        self.W_h_l = self.add_param(init.GlorotNormal(), (2, self.n_h_l),
                                    name='W_h_l')
        self.b_h_l = self.add_param(init.Constant(0.), (self.n_h_l, ),
                                    name='b_h_l')

        self.W_f_g_1 = self.add_param(init.GlorotNormal(),
                                      (self.n_h_g, self.n_f_g),
                                      name='W_f_g_1')
        self.W_f_g_2 = self.add_param(init.GlorotNormal(),
                                      (self.n_h_l, self.n_f_g),
                                      name='W_f_g_2')
        self.b_f_g = self.add_param(init.Constant(0.), (self.n_f_g, ),
                                    name='b_f_g')

        # for core network, f_h
        self.W_f_h_1 = self.add_param(init.GlorotNormal(),
                                      (self.n_f_g, self.n_f_h),
                                      name='W_f_h_1')
        self.W_f_h_2 = self.add_param(init.GlorotNormal(),
                                      (self.n_f_g, self.n_f_h),
                                      name='W_f_h_2')
        self.b_f_h = self.add_param(init.Constant(0.), (self.n_f_h, ),
                                    name='b_f_h')

        # for action network (location) f_l
        self.W_f_l = self.add_param(init.GlorotNormal(), (self.n_f_h, 2),
                                    name='W_f_l')
        self.b_f_l = self.add_param(init.Constant(0.), (2, ), name='b_f_l')

        # for action network (classification) f_a
        self.W_classifier = self.add_param(init.GlorotNormal(),
                                           (self.n_f_h, self.n_classes),
                                           name='W_classifier')
        self.b_classifier = self.add_param(init.Constant(0.),
                                           (self.n_classes, ),
                                           name='b_classifier')

        # for step
        self._srng = RandomStreams(np.random.randint(1, 2147462579))
        self.sigma = 0.1
        self.hid_init = self.add_param(init.Constant(0.),
                                       (1, ) + (self.n_f_h, ),
                                       name="hid_init",
                                       trainable=learn_init,
                                       regularizable=False)
    def __init__(self,
                 n_in,
                 n_filters,
                 filter_sizes,
                 n_out,
                 pool_sizes=None,
                 n_hidden=(512),
                 ccf=False,
                 trans_func=rectify,
                 out_func=softmax,
                 dense_dropout=0.0,
                 stats=2,
                 input_noise=0.0,
                 batch_norm=False,
                 conv_dropout=0.0):
        super(CNN, self).__init__(n_in, n_hidden, n_out, trans_func)
        self.outf = out_func
        self.log = ""

        # Define model using lasagne framework
        dropout = True if not dense_dropout == 0.0 else False

        # Overwrite input layer
        sequence_length, n_features = n_in
        self.l_in = InputLayer(shape=(None, sequence_length, n_features))
        l_prev = self.l_in

        # Separate into raw values and statistics
        sequence_length -= stats
        stats_layer = SliceLayer(l_prev,
                                 indices=slice(sequence_length, None),
                                 axis=1)
        stats_layer = ReshapeLayer(stats_layer, (-1, stats * n_features))
        print('Stats layer shape', stats_layer.output_shape)
        l_prev = SliceLayer(l_prev, indices=slice(0, sequence_length), axis=1)
        print('Conv input layer shape', l_prev.output_shape)

        # Apply input noise
        l_prev = GaussianNoiseLayer(l_prev, sigma=input_noise)

        if ccf:
            self.log += "\nAdding cross-channel feature layer"
            l_prev = ReshapeLayer(l_prev, (-1, 1, sequence_length, n_features))
            l_prev = Conv2DLayer(l_prev,
                                 num_filters=4 * n_features,
                                 filter_size=(1, n_features),
                                 nonlinearity=None)
            n_features *= 4
            if batch_norm:
                l_prev = batch_norm_layer(l_prev)
            l_prev = ReshapeLayer(l_prev, (-1, n_features, sequence_length))
            l_prev = DimshuffleLayer(l_prev, (0, 2, 1))

        # 2D Convolutional layers
        l_prev = ReshapeLayer(l_prev, (-1, 1, sequence_length, n_features))
        l_prev = DimshuffleLayer(l_prev, (0, 3, 2, 1))

        # Add the convolutional filters
        for n_filter, filter_size, pool_size in zip(n_filters, filter_sizes,
                                                    pool_sizes):
            self.log += "\nAdding 2D conv layer: %d x %d" % (n_filter,
                                                             filter_size)
            l_prev = Conv2DLayer(l_prev,
                                 num_filters=n_filter,
                                 filter_size=(filter_size, 1),
                                 nonlinearity=self.transf,
                                 pad=filter_size // 2)
            if batch_norm:
                l_prev = batch_norm_layer(l_prev)
            if pool_size > 1:
                self.log += "\nAdding max pooling layer: %d" % pool_size
                l_prev = Pool2DLayer(l_prev, pool_size=(pool_size, 1))
            self.log += "\nAdding dropout layer: %.2f" % conv_dropout
            l_prev = TiedDropoutLayer(l_prev, p=conv_dropout)
            print("Conv out shape", get_output_shape(l_prev))

        # Global pooling layer
        l_prev = GlobalPoolLayer(l_prev,
                                 pool_function=T.mean,
                                 name='Global Mean Pool')
        print("GlobalPoolLayer out shape", get_output_shape(l_prev))

        # Concatenate stats
        l_prev = ConcatLayer((l_prev, stats_layer), axis=1)

        for n_hid in n_hidden:
            self.log += "\nAdding dense layer with %d units" % n_hid
            print("Dense input shape", get_output_shape(l_prev))
            l_prev = DenseLayer(l_prev, n_hid, init.GlorotNormal(),
                                init.Normal(1e-3), self.transf)
            if batch_norm:
                l_prev = batch_norm_layer(l_prev)
            if dropout:
                self.log += "\nAdding dense dropout with probability: %.2f" % dense_dropout
                l_prev = DropoutLayer(l_prev, p=dense_dropout)

        if batch_norm:
            self.log += "\nUsing batch normalization"

        self.model = DenseLayer(l_prev, num_units=n_out, nonlinearity=out_func)
        self.model_params = get_all_params(self.model)

        self.sym_x = T.tensor3('x')
        self.sym_t = T.matrix('t')
示例#27
0
    def build_model(self, use_mean_lstm = True, old_version = True,
                    #init params used during setup stage
                    transf = lasagne.nonlinearities.tanh, # density layer active function
                    word_ebd_init = init.Normal(1e-6),
                    b_init = init.Normal(1e-4), W_init = init.GlorotNormal(),
                    W_init_act = init.GlorotNormal()):
        self.transf = transf
        ############## build model ############
        self.l_sents_in = InputLayer((None,None))
        self.l_mask_in = InputLayer((None,None))
        self.l_label_in = InputLayer((None,self.dimy)) #for unlabel data, y is generated from classifier,else y is a parameter in trainning
        self.l_z_in = InputLayer((None, self.dimz)) #samples in generation model
        self.l_dec_cell_in = InputLayer((None, self.dec_num_units)) #used in one step beam search
        self.l_dec_hid_in = InputLayer((None, self.dec_num_units)) #used in one step beam search
        self.l_dec_input_word_in = InputLayer((None, None, self.word_ebd_dims)) #batch_size * sent_length(max lstm steps) * word_ebd_dims
        self.l_dec_out_in = InputLayer((None,None, self.dec_num_units))
        ###word embedding layers
        self.l_ebd =  EmbeddingLayer(self.l_sents_in,self.word_dict_size, self.word_ebd_dims, W = word_ebd_init,
                                         name = 'EbdLayer' )
        self.l_ebd_drop = DropoutLayer(self.l_ebd, p = self.drop_out,
                                       name = 'EbdDropoutLayer') #no params; input: batch_size*sent_length*word_ebd_dims
        ####################encoder lstm layers######################################
        self.l_x = DropoutLayer( LSTMLayer(self.l_ebd_drop, num_units = self.enc_num_units, mask_input = self.l_mask_in,
                                                    grad_clipping = self.grad_clipping, only_return_final = True, name='EncLSTMLayer'),
                                                    p = self.drop_out, name='EncLSTMLayer') #LSTM for classifier mean pooling is better?
        if use_mean_lstm:
            print 'Using mean pooling for classifier!!!!!!!!!!!!!!!'
            self.l_c = DropoutLayer( MeanLstmLayer(self.l_ebd_drop, num_units = self.enc_num_units, mask_input = self.l_mask_in,
                                            grad_clipping = self.grad_clipping, name='ClassLSTMLayer'),
                                            p = self.drop_out, name='ClassLSTMLayer')
        else:
            self.l_c = DropoutLayer( LSTMLayer(self.l_ebd_drop, num_units = self.enc_num_units, mask_input = self.l_mask_in,
                                            grad_clipping = self.grad_clipping, only_return_final = True, name='ClassLSTMLayer'),
                                            p = self.drop_out, name='ClassLSTMLayer')
        #----------------- auxiliary q(a|x) ###########################################
        if old_version:
            self.l_x_to_a = DropoutLayer( batch_norm( DenseLayer(self.l_x, num_units= self.dima,
                                                W = W_init_act, b = b_init,
                                                nonlinearity= self.transf, name = 'x_to_a_old'),
                                                alpha = self.bnalpha, name = 'x_to_a_old'),p = self.drop_out, name = 'x_to_a_old')
        else:
            print 'Using new version of model!!!!!!!!!!'
            self.l_mean_pooling =DropoutLayer(MeanMaskLayer(self.l_ebd_drop, self.l_mask_in, name='mean_pooling'),
                                              p = self.drop_out, name = 'mean_pooling')
            self.l_x_to_a = DropoutLayer( batch_norm( DenseLayer(self.l_mean_pooling, num_units= self.dima,
                                                W = W_init_act, b = b_init,
                                                nonlinearity= self.transf, name = 'x_to_a_new'),
                                                alpha = self.bnalpha, name = 'x_to_a_new'),p = self.drop_out, name = 'x_to_a_new')

        self.l_a_mu = DenseLayer(self.l_x_to_a, self.dima, W = W_init, b = b_init, nonlinearity=None, name = 'a_mu') #Linear without active functions
        self.l_a_var = DenseLayer(self.l_x_to_a, self.dima, W = W_init,b = b_init, nonlinearity=None, name = 'a_var')
        self.l_a = SimpleSampleLayer(self.l_a_mu, self.l_a_var, name= 'a_sample') #no params
        ################# Classifier q(y|a,x) #####################################
        self.l_ax = ConcatLayer([self.l_c, self.l_a], axis=1, name = 'Concat_ax') #no params
        self.l_ax_to_y = DropoutLayer( batch_norm( DenseLayer(self.l_ax, num_units= self.dimy,
                                        W = W_init_act, b= b_init,
                                        nonlinearity= self.transf,name = 'ax_to_y'),
                                        alpha = self.bnalpha,name = 'ax_to_y'), p = self.drop_out, name = 'ax_to_y')
        self.l_y = DenseLayer(self.l_ax, num_units= self.dimy,W=W_init, b = b_init,
                              nonlinearity= softmax, name='y_classifier' )
        #################### sample q(z|a,x,y) ####################################
        self.l_xy = ConcatLayer([self.l_x, self.l_label_in], axis=1, name = 'Concat_xy') #no params first use l_label_in
        self.l_xy_to_z = DropoutLayer( batch_norm( DenseLayer(self.l_xy, num_units= self.dimz,
                                                    W = W_init_act, b= b_init,
                                                    nonlinearity= self.transf, name='xy_to_z'),
                                                    alpha = self.bnalpha, name='xy_to_z'), p = self.drop_out, name='xy_to_z')
        self.l_z_mu = DenseLayer(self.l_xy_to_z, self.dimz,W=W_init, b=b_init, nonlinearity=None, name='z_mu') #Linear without active functions
        self.l_z_var = DenseLayer(self.l_xy_to_z, self.dimz,W=W_init, b=b_init, nonlinearity=None, name='z_var') #Linear without active functions
        self.l_z = SimpleSampleLayer(self.l_z_mu, self.l_z_var, name ='z_sample')

        ################## generative model, we use 'u' to stand 'a' in paper #####
        self.l_yz = ConcatLayer([self.l_label_in, self.l_z_in], axis = 1, name='Concat_yz') #l_z_in layer is used in beam search
        self.l_hid = batch_norm( DenseLayer(self.l_yz, num_units= self.dec_num_units,
                                                    W = W_init_act, b= b_init,
                                                    nonlinearity= self.transf, name ='LmHidInit'),
                                                    alpha = self.bnalpha, name ='LmHidInit') #init of hidden has no dropout
        ######################## dec lm ###################
        self.l_lm =  ScLSTMLayer(incoming=self.l_dec_input_word_in, num_units= self.dec_num_units,da_init=self.l_label_in,
                                cell_init=self.l_dec_cell_in,  hid_init=self.l_dec_hid_in, mask_input= self.l_mask_in,
                                grad_clipping = self.grad_clipping, name='ScLSTMLayer') #cell, hid used in beam search, shape(batch_size,sent_length,dec_num_units)
        ######################## softmax results ###################
        self.l_recons_x = DenseLayer(DropoutLayer(ReshapeLayer(self.l_dec_out_in, shape=(-1, self.dec_num_units), name='ScLSTMLayer'),
                                     p = self.drop_out, name='ScLSTMLayer'),#output shape:( batch_size*sent_length,dec_num_units)
                                num_units = self.word_dict_size, W=W_init, b=b_init,nonlinearity = softmax, name='recons_x')#(batch_size*sent_length, word_dict_size)
        '''
示例#28
0
    def __init__(self,
                 n_c,
                 n_l,
                 n_a,
                 n_z,
                 n_y,
                 qa_hid,
                 qz_hid,
                 qy_hid,
                 px_hid,
                 pa_hid,
                 filters,
                 nonlinearity=rectify,
                 px_nonlinearity=None,
                 x_dist='bernoulli',
                 batchnorm=False,
                 seed=1234):
        """
        Initialize an skip deep generative model consisting of
        discriminative classifier q(y|a,x),
        generative model P p(a|z,y) and p(x|a,z,y),
        inference model Q q(a|x) and q(z|a,x,y).
        Weights are initialized using the Bengio and Glorot (2010) initialization scheme.
        :param n_c: Number of input channels.
        :param n_l: Number of lengths.
        :param n_a: Number of auxiliary.
        :param n_z: Number of latent.
        :param n_y: Number of classes.
        :param qa_hid: List of number of deterministic hidden q(a|x).
        :param qz_hid: List of number of deterministic hidden q(z|a,x,y).
        :param qy_hid: List of number of deterministic hidden q(y|a,x).
        :param px_hid: List of number of deterministic hidden p(a|z,y) & p(x|z,y).
        :param nonlinearity: The transfer function used in the deterministic layers.
        :param x_dist: The x distribution, 'bernoulli', 'multinomial', or 'gaussian'.
        :param batchnorm: Boolean value for batch normalization.
        :param seed: The random seed.
        """
        super(CSDGM, self).__init__(n_c, qz_hid + px_hid, n_a + n_z,
                                    nonlinearity)
        self.x_dist = x_dist
        self.n_y = n_y
        self.n_c = n_c
        self.n_l = n_l
        self.n_a = n_a
        self.n_z = n_z
        self.batchnorm = batchnorm
        self._srng = RandomStreams(seed)

        # Decide Glorot initializaiton of weights.
        init_w = 1e-3
        hid_w = ""
        if nonlinearity == rectify or nonlinearity == softplus:
            hid_w = "relu"

        pool_layers = []

        # Define symbolic variables for theano functions.
        self.sym_beta = T.scalar('beta')  # scaling constant beta
        self.sym_x_l = T.tensor3('x')  # labeled inputs
        self.sym_t_l = T.matrix('t')  # labeled targets
        self.sym_x_u = T.tensor3('x')  # unlabeled inputs
        self.sym_bs_l = T.iscalar('bs_l')  # number of labeled data
        self.sym_samples = T.iscalar('samples')  # MC samples
        self.sym_z = T.matrix('z')  # latent variable z
        self.sym_a = T.matrix('a')  # auxiliary variable a
        self.sym_warmup = T.fscalar('warmup')  # warmup to scale KL term

        # Assist methods for collecting the layers
        def dense_layer(layer_in,
                        n,
                        dist_w=init.GlorotNormal,
                        dist_b=init.Normal):
            dense = DenseLayer(layer_in, n, dist_w(hid_w), dist_b(init_w),
                               None)
            if batchnorm:
                dense = BatchNormLayer(dense)
            return NonlinearityLayer(dense, self.transf)

        def stochastic_layer(layer_in, n, samples, nonlin=None):
            mu = DenseLayer(layer_in, n, init.Normal(init_w),
                            init.Normal(init_w), nonlin)
            logvar = DenseLayer(layer_in, n, init.Normal(init_w),
                                init.Normal(init_w), nonlin)
            return SampleLayer(mu, logvar, eq_samples=samples,
                               iw_samples=1), mu, logvar

        def conv_layer(layer_in,
                       filter,
                       stride=(1, 1),
                       pool=1,
                       name='conv',
                       dist_w=init.GlorotNormal,
                       dist_b=init.Normal):
            l_conv = Conv2DLayer(layer_in,
                                 num_filters=filter,
                                 filter_size=(3, 1),
                                 stride=stride,
                                 pad='full',
                                 W=dist_w(hid_w),
                                 b=dist_b(init_w),
                                 name=name)
            if pool > 1:
                l_conv = MaxPool2DLayer(l_conv, pool_size=(pool, 1))
                pool_layers.append(l_conv)
            return l_conv

        # Input layers
        l_y_in = InputLayer((None, n_y))
        l_x_in = InputLayer((None, n_l, n_c), name='Input')

        # Reshape input
        l_x_in_reshp = ReshapeLayer(l_x_in, (-1, 1, n_l, n_c))
        print("l_x_in_reshp", l_x_in_reshp.output_shape)

        # CNN encoder implementation
        l_conv_enc = l_x_in_reshp
        for filter, stride, pool in filters:
            l_conv_enc = conv_layer(l_conv_enc, filter, stride, pool)
            print("l_conv_enc", l_conv_enc.output_shape)

        # Pool along last 2 axes
        l_global_pool_enc = GlobalPoolLayer(l_conv_enc, pool_function=T.mean)
        l_enc = dense_layer(l_global_pool_enc, n_z)
        print("l_enc", l_enc.output_shape)

        # Auxiliary q(a|x)
        l_qa_x = l_enc
        for hid in qa_hid:
            l_qa_x = dense_layer(l_qa_x, hid)
        l_qa_x, l_qa_x_mu, l_qa_x_logvar = stochastic_layer(
            l_qa_x, n_a, self.sym_samples)

        # Classifier q(y|a,x)
        l_qa_to_qy = DenseLayer(l_qa_x, qy_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qa_to_qy = ReshapeLayer(l_qa_to_qy,
                                  (-1, self.sym_samples, 1, qy_hid[0]))
        l_x_to_qy = DenseLayer(l_enc, qy_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_x_to_qy = DimshuffleLayer(l_x_to_qy, (0, 'x', 'x', 1))
        l_qy_xa = ReshapeLayer(ElemwiseSumLayer([l_qa_to_qy, l_x_to_qy]),
                               (-1, qy_hid[0]))
        if batchnorm:
            l_qy_xa = BatchNormLayer(l_qy_xa)
        l_qy_xa = NonlinearityLayer(l_qy_xa, self.transf)
        if len(qy_hid) > 1:
            for hid in qy_hid[1:]:
                l_qy_xa = dense_layer(l_qy_xa, hid)
        l_qy_xa = DenseLayer(l_qy_xa, n_y, init.GlorotNormal(),
                             init.Normal(init_w), softmax)

        # Recognition q(z|x,a,y)
        l_qa_to_qz = DenseLayer(l_qa_x, qz_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qa_to_qz = ReshapeLayer(l_qa_to_qz,
                                  (-1, self.sym_samples, 1, qz_hid[0]))
        l_x_to_qz = DenseLayer(l_enc, qz_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_x_to_qz = DimshuffleLayer(l_x_to_qz, (0, 'x', 'x', 1))
        l_y_to_qz = DenseLayer(l_y_in, qz_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_y_to_qz = DimshuffleLayer(l_y_to_qz, (0, 'x', 'x', 1))
        l_qz_axy = ReshapeLayer(
            ElemwiseSumLayer([l_qa_to_qz, l_x_to_qz, l_y_to_qz]),
            (-1, qz_hid[0]))
        if batchnorm:
            l_qz_axy = BatchNormLayer(l_qz_axy)
        l_qz_axy = NonlinearityLayer(l_qz_axy, self.transf)
        if len(qz_hid) > 1:
            for hid in qz_hid[1:]:
                l_qz_axy = dense_layer(l_qz_axy, hid)
        l_qz_axy, l_qz_axy_mu, l_qz_axy_logvar = stochastic_layer(
            l_qz_axy, n_z, 1)

        # Generative p(a|z,y)
        l_y_to_pa = DenseLayer(l_y_in, pa_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_y_to_pa = DimshuffleLayer(l_y_to_pa, (0, 'x', 'x', 1))
        l_qz_to_pa = DenseLayer(l_qz_axy, pa_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qz_to_pa = ReshapeLayer(l_qz_to_pa,
                                  (-1, self.sym_samples, 1, pa_hid[0]))
        l_pa_zy = ReshapeLayer(ElemwiseSumLayer([l_qz_to_pa, l_y_to_pa]),
                               [-1, pa_hid[0]])
        if batchnorm:
            l_pa_zy = BatchNormLayer(l_pa_zy)
        l_pa_zy = NonlinearityLayer(l_pa_zy, self.transf)
        if len(pa_hid) > 1:
            for hid in pa_hid[1:]:
                l_pa_zy = dense_layer(l_pa_zy, hid)
        l_pa_zy, l_pa_zy_mu, l_pa_zy_logvar = stochastic_layer(l_pa_zy, n_a, 1)

        # Generative p(x|a,z,y)
        l_qa_to_px = DenseLayer(l_qa_x, px_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qa_to_px = ReshapeLayer(l_qa_to_px,
                                  (-1, self.sym_samples, 1, px_hid[0]))
        l_y_to_px = DenseLayer(l_y_in, px_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_y_to_px = DimshuffleLayer(l_y_to_px, (0, 'x', 'x', 1))
        l_qz_to_px = DenseLayer(l_qz_axy, px_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qz_to_px = ReshapeLayer(l_qz_to_px,
                                  (-1, self.sym_samples, 1, px_hid[0]))
        l_px_azy = ReshapeLayer(
            ElemwiseSumLayer([l_qa_to_px, l_qz_to_px, l_y_to_px]),
            [-1, px_hid[0]])
        if batchnorm:
            l_px_azy = BatchNormLayer(l_px_azy)
        l_px_azy = NonlinearityLayer(l_px_azy, self.transf)

        # Note that px_hid[0] has to be equal to the number filters in the first convolution. Otherwise add a
        # dense layers here.

        # Inverse pooling
        l_global_depool = InverseLayer(l_px_azy, l_global_pool_enc)
        print("l_global_depool", l_global_depool.output_shape)

        # Reverse pool layer order
        pool_layers = pool_layers[::-1]

        # Decode
        l_deconv = l_global_depool
        for idx, filter in enumerate(filters[::-1]):
            filter, stride, pool = filter
            if pool > 1:
                l_deconv = InverseLayer(l_deconv, pool_layers[idx])
            l_deconv = Conv2DLayer(l_deconv,
                                   num_filters=filter,
                                   filter_size=(3, 1),
                                   stride=(stride, 1),
                                   W=init.GlorotNormal('relu'))
            print("l_deconv", l_deconv.output_shape)

        # The last l_conv layer should give us the input shape
        l_px_azy = Conv2DLayer(l_deconv,
                               num_filters=1,
                               filter_size=(3, 1),
                               pad='same',
                               nonlinearity=None)
        print("l_dec", l_px_azy.output_shape)

        # Flatten first two dimensions
        l_px_azy = ReshapeLayer(l_px_azy, (-1, n_c))

        if x_dist == 'bernoulli':
            l_px_azy = DenseLayer(l_px_azy, n_c, init.GlorotNormal(),
                                  init.Normal(init_w), sigmoid)
        elif x_dist == 'multinomial':
            l_px_azy = DenseLayer(l_px_azy, n_c, init.GlorotNormal(),
                                  init.Normal(init_w), softmax)
        elif x_dist == 'gaussian':
            l_px_azy, l_px_zy_mu, l_px_zy_logvar = stochastic_layer(
                l_px_azy, n_c, self.sym_samples, px_nonlinearity)
        elif x_dist == 'linear':
            l_px_azy = DenseLayer(l_px_azy, n_c, nonlinearity=None)

        # Reshape all the model layers to have the same size
        self.l_x_in = l_x_in
        self.l_y_in = l_y_in
        self.l_a_in = l_qa_x

        self.l_qa = ReshapeLayer(l_qa_x, (-1, self.sym_samples, 1, n_a))
        self.l_qa_mu = DimshuffleLayer(l_qa_x_mu, (0, 'x', 'x', 1))
        self.l_qa_logvar = DimshuffleLayer(l_qa_x_logvar, (0, 'x', 'x', 1))

        self.l_qz = ReshapeLayer(l_qz_axy, (-1, self.sym_samples, 1, n_z))
        self.l_qz_mu = ReshapeLayer(l_qz_axy_mu,
                                    (-1, self.sym_samples, 1, n_z))
        self.l_qz_logvar = ReshapeLayer(l_qz_axy_logvar,
                                        (-1, self.sym_samples, 1, n_z))

        self.l_qy = ReshapeLayer(l_qy_xa, (-1, self.sym_samples, 1, n_y))

        self.l_pa = ReshapeLayer(l_pa_zy, (-1, self.sym_samples, 1, n_a))
        self.l_pa_mu = ReshapeLayer(l_pa_zy_mu, (-1, self.sym_samples, 1, n_a))
        self.l_pa_logvar = ReshapeLayer(l_pa_zy_logvar,
                                        (-1, self.sym_samples, 1, n_a))

        # Here we assume that we pass (batch size * segment length, number of features) to the sample layer from
        # which we then get (batch size * segment length, samples, IW samples, features)
        self.l_px = ReshapeLayer(l_px_azy, (-1, n_l, self.sym_samples, 1, n_c))
        self.l_px_mu = ReshapeLayer(l_px_zy_mu, (-1, n_l, self.sym_samples, 1, n_c)) \
            if x_dist == "gaussian" else None
        self.l_px_logvar = ReshapeLayer(l_px_zy_logvar, (-1, n_l, self.sym_samples, 1, n_c)) \
            if x_dist == "gaussian" else None

        # Predefined functions
        inputs = {l_x_in: self.sym_x_l}
        outputs = get_output(self.l_qy, inputs,
                             deterministic=True).mean(axis=(1, 2))
        self.f_qy = theano.function([self.sym_x_l, self.sym_samples], outputs)

        outputs = get_output(l_qa_x, inputs, deterministic=True)
        self.f_qa = theano.function([self.sym_x_l, self.sym_samples], outputs)

        inputs = {l_x_in: self.sym_x_l, l_y_in: self.sym_t_l}
        outputs = get_output(l_qz_axy, inputs, deterministic=True)
        self.f_qz = theano.function(
            [self.sym_x_l, self.sym_t_l, self.sym_samples], outputs)

        inputs = {l_qz_axy: self.sym_z, l_y_in: self.sym_t_l}
        outputs = get_output(self.l_pa, inputs,
                             deterministic=True).mean(axis=(1, 2))
        self.f_pa = theano.function(
            [self.sym_z, self.sym_t_l, self.sym_samples], outputs)

        inputs = {
            l_x_in: self.sym_x_l,
            l_qa_x: self.sym_a,
            l_qz_axy: self.sym_z,
            l_y_in: self.sym_t_l
        }
        outputs = get_output(self.l_px, inputs,
                             deterministic=True).mean(axis=(2, 3))
        self.f_px = theano.function([
            self.sym_x_l, self.sym_a, self.sym_z, self.sym_t_l,
            self.sym_samples
        ], outputs)

        outputs = get_output(self.l_px_mu, inputs,
                             deterministic=True).mean(axis=(2, 3))
        self.f_mu = theano.function([
            self.sym_x_l, self.sym_a, self.sym_z, self.sym_t_l,
            self.sym_samples
        ], outputs)

        outputs = get_output(self.l_px_logvar, inputs,
                             deterministic=True).mean(axis=(2, 3))
        self.f_var = theano.function([
            self.sym_x_l, self.sym_a, self.sym_z, self.sym_t_l,
            self.sym_samples
        ], outputs)

        # Define model parameters
        self.model_params = get_all_params([self.l_qy, self.l_pa, self.l_px])
        self.trainable_model_params = get_all_params(
            [self.l_qy, self.l_pa, self.l_px], trainable=True)
示例#29
0
    def __init__(self,
                 n_in,
                 n_filters,
                 filter_size,
                 n_out,
                 pool_sizes=None,
                 n_hidden=(),
                 downsample=1,
                 batch_size=100,
                 trans_func=rectify,
                 out_func=softmax,
                 dropout_probability=0.0):
        super(UFCNN, self).__init__(n_in, n_hidden, n_out, trans_func)
        self.outf = out_func
        self.log = ""

        l2_mask = np.zeros((1, 1, filter_size * 2 + 1, 1))
        l2_mask[:, :, 2::2, :] = 1
        l2_mask = l2_mask[:, :, ::-1]
        self.l2_mask = theano.shared(l2_mask.astype(theano.config.floatX),
                                     broadcastable=(True, True, False, False))

        l3_mask = np.zeros((1, 1, filter_size * 4 + 1, 1))
        l3_mask[:, :, 4::4, :] = 1
        l3_mask = l3_mask[:, :, ::-1]
        self.l3_mask = theano.shared(l3_mask.astype(theano.config.floatX),
                                     broadcastable=(True, True, False, False))

        W2 = init.GlorotNormal(gain=1.0).sample(shape=(n_filters, n_filters,
                                                       filter_size * 2 + 1, 1))
        W2 *= l2_mask

        W3 = init.GlorotNormal(gain=1.0).sample(shape=(n_filters, n_filters,
                                                       filter_size * 4 + 1, 1))
        W3 *= l3_mask

        # Overwrite input layer
        sequence_length, n_features = n_in
        self.l_in = InputLayer(shape=(batch_size, sequence_length, n_features))
        l_prev = self.l_in
        l_prev = ReshapeLayer(l_prev,
                              (batch_size, 1, sequence_length, n_features))
        l_prev = DimshuffleLayer(l_prev, (0, 3, 2, 1))

        l_h1 = Conv2DLayer(l_prev,
                           num_filters=n_filters,
                           filter_size=(filter_size, 1),
                           nonlinearity=self.transf,
                           pad='same',
                           name='h1')
        self.log += "\n%s:\t %s" % (l_h1.name, get_output_shape(l_h1))

        l_h2 = Conv2DLayer(l_h1,
                           num_filters=n_filters,
                           filter_size=(filter_size * 2 + 1, 1),
                           nonlinearity=self.transf,
                           pad='same',
                           name='h2',
                           W=W2)
        self.log += "\n%s:\t %s" % (l_h2.name, get_output_shape(l_h2))

        l_h3 = Conv2DLayer(l_h2,
                           num_filters=n_filters,
                           filter_size=(filter_size * 4 + 1, 1),
                           nonlinearity=self.transf,
                           pad='same',
                           name='h3',
                           W=W3)
        self.log += "\n%s:\t %s" % (l_h3.name, get_output_shape(l_h3))

        l_g3 = Conv2DLayer(l_h3,
                           num_filters=n_filters,
                           filter_size=(filter_size * 4 + 1, 1),
                           nonlinearity=self.transf,
                           pad='same',
                           name='g3',
                           W=W3)
        self.log += "\n%s:\t %s" % (l_g3.name, get_output_shape(l_g3))
        print(l_g3.W.get_value()[0, 0])

        l_h2_g3 = ConcatLayer((l_h2, l_g3), axis=1, name='l_h2_g3')
        self.log += "\n%s: %s" % (l_h2_g3.name, get_output_shape(l_h2_g3))

        l_g2 = Conv2DLayer(l_h2_g3,
                           num_filters=n_filters,
                           filter_size=(filter_size * 2 + 1, 1),
                           nonlinearity=self.transf,
                           pad='same',
                           name='g2',
                           W=np.concatenate((W2, W2), axis=1))
        self.log += "\n%s:\t %s" % (l_g2.name, get_output_shape(l_g2))

        l_h1_g2 = ConcatLayer((l_h1, l_g2), axis=1, name='l_h1_g2')
        self.log += "\n%s: %s" % (l_h1_g2.name, get_output_shape(l_h1_g2))
        l_g1 = Conv2DLayer(l_h1_g2,
                           num_filters=n_filters,
                           filter_size=(filter_size, 1),
                           nonlinearity=self.transf,
                           pad='same',
                           name='g1')
        self.log += "\n%s:\t %s" % (l_g1.name, get_output_shape(l_g1))

        l_prev = l_g1
        for n_hid in n_hidden:
            l_prev = DenseLayer(l_prev,
                                num_units=n_hid,
                                nonlinearity=self.transf)
            self.log += "\nAdding dense layer with %d units" % n_hid
            if dropout_probability:
                l_prev = DropoutLayer(l_prev, p=dropout_probability)
                self.log += "\nAdding dropout layer with p=%.3f" % dropout_probability
        self.model = DenseLayer(l_prev, num_units=n_out, nonlinearity=out_func)
        self.model_params = get_all_params(self.model)

        self.sym_x = T.tensor3('x')
        self.sym_t = T.matrix('t')
    def build_model(
        self,
        use_mean_lstm=False,
        act_fun=lasagne.nonlinearities.tanh,  # density layer active function
        word_ebd_init=init.Normal(1e-2),
        b_init=init.Normal(1e-4),
        W_init=init.GlorotNormal()):
        # --------------------------------  Global Inputs  ------------------------------------------------------------
        self.l_sents_in = InputLayer(
            (None, None))  # sentences inputs as word indexes.
        self.l_mask_in = InputLayer((None, None))
        self.l_label_in = InputLayer((None, self.dim_y))  # one hot
        # for unlabel data, y is generated from classifier,else y is a parameter in trainning

        # ---------------------------------  Word Embedding  ---------------------------------------------------------
        # ## Input Nodes: l_sents_in
        self.l_ebd = EmbeddingLayer(self.l_sents_in,
                                    self.word_dict_size,
                                    self.word_ebd_dims,
                                    W=word_ebd_init,
                                    name='EbdLayer')  # we do dropout later.
        self.l_enc_sents_in = InputLayer(
            (None, None, self.word_ebd_dims
             ))  # sentences inputs as for classifier and encoder
        self.l_dec_sents_in = InputLayer(
            (None, None,
             self.word_ebd_dims))  # for decoder, shifted and word-dropoutted

        # ---------------------------------  Classifier ---------------------------------------------------------
        # ## Input Nodes: l_enc_sents_in, l_mask_in
        self.l_c_sents_drop = DropoutLayer(self.l_enc_sents_in,
                                           p=self.drop_out,
                                           name='Classifier Sents Dropout')
        if use_mean_lstm:  # we do dropout later for loading pretraining weights
            self.l_c = MeanLstmLayer(self.l_c_sents_drop,
                                     num_units=self.num_units,
                                     mask_input=self.l_mask_in,
                                     grad_clipping=self.grad_clipping,
                                     name='Classifier Mean')
        else:
            self.l_c = LSTMLayer(self.l_c_sents_drop,
                                 num_units=self.num_units,
                                 mask_input=self.l_mask_in,
                                 grad_clipping=self.grad_clipping,
                                 only_return_final=True,
                                 name='Classifier Final')
        self.l_c_drop = DropoutLayer(self.l_c,
                                     p=self.drop_out,
                                     name='Classifier LSTM Dropout')
        self.l_c_to_y = DropoutLayer(batch_norm(DenseLayer(
            self.l_c_drop,
            num_units=self.num_units,
            W=W_init,
            b=b_init,
            nonlinearity=act_fun,
            name='c_to_y'),
                                                name='c_to_y'),
                                     p=self.drop_out,
                                     name='c_to_y')
        self.l_y = DenseLayer(self.l_c_to_y,
                              num_units=self.dim_y,
                              W=W_init,
                              b=b_init,
                              nonlinearity=softmax,
                              name='y_pred')

        # ---------------------------------  Inference Network ---------------------------------------------------------
        # ## Input Nodes: l_enc_sents_in, l_label_in, l_mask_in
        self.l_enc_sents_drop = DropoutLayer(self.l_enc_sents_in,
                                             p=self.drop_out,
                                             name='Enc Sents Dropout')
        # Encoder LSTM
        self.l_x = DropoutLayer(LSTMLayer(self.l_enc_sents_drop,
                                          num_units=self.num_units,
                                          mask_input=self.l_mask_in,
                                          grad_clipping=self.grad_clipping,
                                          only_return_final=True,
                                          name='Enc LSTM'),
                                p=self.drop_out,
                                name='Enc LSTM Drop')
        # Encoder Dense Layer(s), use a class if many
        self.l_x_to_a = DropoutLayer(batch_norm(DenseLayer(
            self.l_x,
            num_units=self.num_units,
            W=W_init,
            b=b_init,
            nonlinearity=act_fun,
            name='x_to_a'),
                                                name='x_to_a'),
                                     p=self.drop_out,
                                     name='x_to_a')
        # combine information from label and encoder
        self.l_label_to_enc = DropoutLayer(DenseLayer(self.l_label_in,
                                                      num_units=self.num_units,
                                                      W=W_init,
                                                      b=b_init,
                                                      nonlinearity=act_fun,
                                                      name='label_to_enc'),
                                           p=self.drop_out,
                                           name='label_to_enc')
        self.l_xy = ConcatLayer([self.l_x_to_a, self.l_label_to_enc],
                                axis=1,
                                name='Concat_xy')
        self.l_xy = DropoutLayer(batch_norm(DenseLayer(
            self.l_xy,
            num_units=self.num_units,
            W=W_init,
            b=b_init,
            nonlinearity=act_fun,
            name='xy'),
                                            name='xy'),
                                 p=self.drop_out,
                                 name='xy')
        self.l_xy_to_z = DropoutLayer(batch_norm(DenseLayer(
            self.l_xy,
            num_units=self.dim_z,
            W=W_init,
            b=b_init,
            nonlinearity=act_fun,
            name='xy_to_z'),
                                                 name='xy_to_z'),
                                      p=self.drop_out,
                                      name='xy_to_z')
        # sample z
        self.l_z_mu = DenseLayer(self.l_xy_to_z,
                                 self.dim_z,
                                 W=W_init,
                                 b=b_init,
                                 nonlinearity=None,
                                 name='z_mu')
        self.l_z_var = DenseLayer(self.l_xy_to_z,
                                  self.dim_z,
                                  W=W_init,
                                  b=b_init,
                                  nonlinearity=None,
                                  name='z_var')
        self.l_z = SimpleSampleLayer(self.l_z_mu,
                                     self.l_z_var,
                                     name='z_sample')

        # ---------------------------------  Generation Network -------------------------------------------------------
        # ## Input Nodes: l_label_in, l_z_in, l_dec_sents_in, l_mask_in
        # In this model, there is no interface for beam search.
        self.l_z_in = InputLayer((None, self.dim_z))
        self.l_label_to_dec = DropoutLayer(DenseLayer(self.l_label_in,
                                                      num_units=self.num_units,
                                                      W=W_init,
                                                      b=b_init,
                                                      nonlinearity=act_fun,
                                                      name='label_to_dec'),
                                           p=self.drop_out,
                                           name='label_to_dec')
        self.l_yz = ConcatLayer([self.l_label_to_dec, self.l_z_in],
                                axis=1,
                                name='Concat_yz')
        # Decoder Dense Layer(s)
        self.l_yz = DropoutLayer(batch_norm(DenseLayer(
            self.l_yz,
            num_units=self.num_units,
            W=W_init,
            b=b_init,
            nonlinearity=act_fun,
            name='yz'),
                                            name='yz'),
                                 p=self.drop_out,
                                 name='yz')
        # the last layer has no dropout
        self.l_hid = batch_norm(DenseLayer(self.l_yz,
                                           num_units=self.num_units,
                                           W=W_init,
                                           b=b_init,
                                           nonlinearity=act_fun,
                                           name='yz_to_hid'),
                                name='yz_to_hid')
        # language model
        self.l_lm = ScLSTMLayer(incoming=self.l_dec_sents_in,
                                num_units=self.num_units,
                                da_init=self.l_label_in,
                                hid_init=self.l_hid,
                                mask_input=self.l_mask_in,
                                grad_clipping=self.grad_clipping,
                                name='ScLSTMLayer')
        self.l_rec = DenseLayer(DropoutLayer(ReshapeLayer(
            self.l_lm, shape=(-1, self.num_units), name='ScLSTMLayer'),
                                             p=self.drop_out,
                                             name='ScLSTMLayer'),
                                num_units=self.word_dict_size,
                                W=W_init,
                                b=b_init,
                                nonlinearity=softmax,
                                name='recons_x')
        # (batch_size*sent_length, word_dict_size)

        # ------------------------------- Baseline ----------------------------------
        if theano.config.floatX == 'float32':
            self.b = theano.shared(np.float32(5.5))
        else:
            self.b = theano.shared(np.float64(5.5))