Пример #1
0
    def __init__(self,
                 train_x,
                 train_y,
                 test_x,
                 test_y,
                 psgd_type,
                 optimizer_type,
                 server_codec,
                 epoches,
                 server_type='asgd',
                 target_acc=None,
                 learn_rate=0.05,
                 codec=None):

        super().__init__(train_x, train_y, test_x, test_y, psgd_type,
                         optimizer_type, server_codec, epoches, server_type,
                         target_acc, learn_rate)

        self.__nn = []
        self.__nn.append(Conv2dLayer([5, 5], 64, 'SAME', [1, 1]))
        self.__nn.append(Conv2dLayer([5, 5], 64, 'SAME', [1, 1]))
        self.__nn.append(MaxPool([2, 2]))
        self.__nn.append(Conv2dLayer([3, 3], 64, 'SAME', [1, 1]))
        self.__nn.append(Conv2dLayer([3, 3], 64, 'SAME', [1, 1]))
        self.__nn.append(MaxPool([2, 2]))
        self.__nn.append(Reshape([12288]))
        self.__nn.append(FCLayer_v2(1024, act=get_activation('tanh')))
        self.__nn.append(FCLayer_v2(784, act=get_activation('tanh')))
        self.__nn.append(FCLayer_v2(10, act=get_activation('softmax')))

        if codec is None:
            codec = ['plain' for _ in self.__nn]
        elif len(codec) != len(self.__nn):
            codec = codec[:1] * len(self.__nn)

        self.__codec = [get_codec(c_str) for c_str in codec]
Пример #2
0
def make_model(z, sample_size, dropword_p, n_classes, encdec_layers,
               charcnn_size, charcnn_layers, alpha):
    assert sample_size % (2**encdec_layers) == 0
    if encdec_layers == 2:
        encoder = [
            OneHot(n_classes),
            LayoutRNNToCNN(),
            Convolution1d(3,
                          128,
                          n_classes,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv1"),
            BatchNormalization(128, name="bn1", collect=False),
            ReLU(),
            Convolution1d(3,
                          256,
                          128,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv2"),
            BatchNormalization(256, name="bn2", collect=False),
            ReLU(),
            Flatten(),
            Linear(sample_size // 4 * 256, z * 2, name="fc_encode"),
            Sampler(z),
        ]
        decoder_from_z = [
            Linear(z, sample_size // 4 * 256, name="fc_decode"),
            ReLU(),
            Reshape((-1, 256, sample_size // 4, 1)),
            Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
            BatchNormalization(128, name="deconv_bn2", collect=False),
            ReLU(),
            Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
            BatchNormalization(200, name="deconv_bn1", collect=False),
            ReLU(),
            LayoutCNNToRNN(),
            Parallel([[
                Linear(200, n_classes, name="aux_classifier"),
                SoftMax(),
                Store()
            ], []],
                     shared_input=True), lambda x: x[1]
        ]
    elif encdec_layers == 3:
        encoder = [
            OneHot(n_classes),
            LayoutRNNToCNN(),
            Convolution1d(3,
                          128,
                          n_classes,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv1"),
            BatchNormalization(128, name="bn1"),
            ReLU(),
            Convolution1d(3,
                          256,
                          128,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv2"),
            BatchNormalization(256, name="bn2"),
            ReLU(),
            Convolution1d(3,
                          512,
                          256,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv3"),
            BatchNormalization(512, name="bn3"),
            ReLU(),
            Flatten(),
            Linear(sample_size // 8 * 512, z * 2, name="fc_encode"),
            Sampler(z),
        ]
        decoder_from_z = [
            Linear(z, sample_size // 8 * 512, name="fc_decode"),
            ReLU(),
            Reshape((-1, 512, sample_size // 8, 1)),
            Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"),
            BatchNormalization(256, name="deconv_bn3", collect=False),
            ReLU(),
            Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
            BatchNormalization(128, name="deconv_bn2", collect=False),
            ReLU(),
            Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
            BatchNormalization(200, name="deconv_bn1", collect=False),
            ReLU(),
            LayoutCNNToRNN(),
            Parallel([[
                Linear(200, n_classes, name="aux_classifier"),
                SoftMax(),
                Store()
            ], []],
                     shared_input=True), lambda x: x[1]
        ]
    else:
        raise Exception("unsupported number of encdec layers %d" %
                        encdec_layers)

    start_word = n_classes
    dummy_word = n_classes + 1
    decoder_from_words = [
        Dropword(dropword_p, dummy_word=dummy_word),
        lambda x: T.concatenate(
            [T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0),
        lambda x: x[:-1],
        OneHot(n_classes + 2),
    ]
    layers = [
        Parallel([encoder, []], shared_input=True),
        Parallel([decoder_from_z, decoder_from_words], shared_input=False),
        lambda x: T.concatenate(x, axis=2),
        LayoutRNNToCNN(),
        Convolution1d(1,
                      charcnn_size * 2,
                      200 + n_classes + 2,
                      name="decconvresize"),
        BatchNormalization(charcnn_size * 2, name="decbnresize"),
        Gated(),
    ]
    for i in range(charcnn_layers):
        layers.append(
            HighwayConvolution1d(3,
                                 charcnn_size,
                                 dilation=1,
                                 name="decconv%d" % i))
    layers.extend([
        LayoutCNNToRNN(),
        Linear(charcnn_size, n_classes, name="classifier"),
        SoftMax()
    ])

    model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha)

    return model