def make_model(z, net, sample_size, p, n_classes): if net == "conv": assert sample_size % 4 == 0 layers = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1"), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2"), ReLU(), Flatten(), Linear(sample_size / 4 * 256, z * 2, name="fc_encode"), Sampler(z), Linear(z, sample_size / 4 * 256, name="fc_decode"), ReLU(), Reshape((-1, 256, sample_size / 4, 1)), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2"), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1"), ReLU(), LayoutCNNToRNN(), Linear(200, n_classes, name="classifier"), SoftMax() ] elif net == "rnn": start_word = n_classes dummy_word = n_classes + 1 layers = [ Parallel( [[ OneHot(n_classes), LNLSTM(n_classes, 500, name="enc"), lambda x: x[-1], Linear(500, z * 2, name="encoder_fc"), Sampler(z), ], [ Dropword(p, dummy_word=dummy_word), lambda x: T.concatenate([ T.ones((1, x.shape[1]), dtype='int32') * start_word, x ], axis=0), lambda x: x[:-1], OneHot(n_classes + 2), ]]), ConditionalDecoderLNLSTM(n_classes + 2, z, 500, name="dec", steps=sample_size), Linear(500, n_classes, name="classifier"), SoftMax() ] else: raise Exception("unknown net %s" % net) model = LMReconstructionModel(layers, aux_loss=False) return model
def make_model(z, sample_size, dropword_p, n_classes, encdec_layers, charcnn_size, charcnn_layers, alpha): assert sample_size % (2**encdec_layers) == 0 if encdec_layers == 2: encoder = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1", collect=False), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2", collect=False), ReLU(), Flatten(), Linear(sample_size // 4 * 256, z * 2, name="fc_encode"), Sampler(z), ] decoder_from_z = [ Linear(z, sample_size // 4 * 256, name="fc_decode"), ReLU(), Reshape((-1, 256, sample_size // 4, 1)), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2", collect=False), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1", collect=False), ReLU(), LayoutCNNToRNN(), Parallel([[ Linear(200, n_classes, name="aux_classifier"), SoftMax(), Store() ], []], shared_input=True), lambda x: x[1] ] elif encdec_layers == 3: encoder = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1"), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2"), ReLU(), Convolution1d(3, 512, 256, pad=1, stride=2, causal=False, name="conv3"), BatchNormalization(512, name="bn3"), ReLU(), Flatten(), Linear(sample_size // 8 * 512, z * 2, name="fc_encode"), Sampler(z), ] decoder_from_z = [ Linear(z, sample_size // 8 * 512, name="fc_decode"), ReLU(), Reshape((-1, 512, sample_size // 8, 1)), Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"), BatchNormalization(256, name="deconv_bn3", collect=False), ReLU(), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2", collect=False), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1", collect=False), ReLU(), LayoutCNNToRNN(), Parallel([[ Linear(200, n_classes, name="aux_classifier"), SoftMax(), Store() ], []], shared_input=True), lambda x: x[1] ] else: raise Exception("unsupported number of encdec layers %d" % encdec_layers) start_word = n_classes dummy_word = n_classes + 1 decoder_from_words = [ Dropword(dropword_p, dummy_word=dummy_word), lambda x: T.concatenate( [T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0), lambda x: x[:-1], OneHot(n_classes + 2), ] layers = [ Parallel([encoder, []], shared_input=True), Parallel([decoder_from_z, decoder_from_words], shared_input=False), lambda x: T.concatenate(x, axis=2), LayoutRNNToCNN(), Convolution1d(1, charcnn_size * 2, 200 + n_classes + 2, name="decconvresize"), BatchNormalization(charcnn_size * 2, name="decbnresize"), Gated(), ] for i in range(charcnn_layers): layers.append( HighwayConvolution1d(3, charcnn_size, dilation=1, name="decconv%d" % i)) layers.extend([ LayoutCNNToRNN(), Linear(charcnn_size, n_classes, name="classifier"), SoftMax() ]) model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha) return model
def make_model(z, sample_size, dropword_p, n_classes, lstm_size, alpha): encoder = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1"), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2"), ReLU(), Convolution1d(3, 512, 256, pad=1, stride=2, causal=False, name="conv3"), BatchNormalization(512, name="bn3"), ReLU(), Convolution1d(3, 512, 512, pad=1, stride=2, causal=False, name="conv4"), BatchNormalization(512, name="bn4"), ReLU(), Convolution1d(3, 512, 512, pad=1, stride=2, causal=False, name="conv5"), BatchNormalization(512, name="bn5"), ReLU(), Flatten(), Linear(sample_size // (2**5) * 512, z * 2, name="fc_encode"), Sampler(z), ] decoder_from_z = [ Linear(z, sample_size // (2**5) * 512, name="fc_decode"), ReLU(), Reshape((-1, 512, sample_size // (2**5), 1)), Deconvolution1D(512, 512, 3, pad=1, stride=2, name="deconv5"), BatchNormalization(512, name="deconv_bn5"), ReLU(), Deconvolution1D(512, 512, 3, pad=1, stride=2, name="deconv4"), BatchNormalization(512, name="deconv_bn4"), ReLU(), Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"), BatchNormalization(256, name="deconv_bn3"), ReLU(), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2"), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1"), ReLU(), LayoutCNNToRNN(), Parallel([ [ Linear(200, n_classes, name="aux_classifier"), SoftMax(), Store() ], [] ], shared_input=True), lambda x: x[1] ] start_word = n_classes dummy_word = n_classes + 1 decoder_from_words = [ Dropword(dropword_p, dummy_word=dummy_word), lambda x: T.concatenate([T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0), lambda x: x[:-1], OneHot(n_classes+2), ] layers = [ Parallel([ encoder, [] ], shared_input=True), Parallel([ decoder_from_z, decoder_from_words ], shared_input=False), lambda x: T.concatenate(x, axis=2), LNLSTM(200+n_classes+2, lstm_size, name="declstm"), Linear(lstm_size, n_classes, name="classifier"), SoftMax() ] model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha) return model
# A simple convnet # weight_init and filter_init initilialize the paramaters of the neural network, # making the weights inversely proportional to the parameters in each layer layers = [ Conv((4, 4, 3, 20), strides=2, activation=lkrelu, filter_init=lambda shp: np.random.normal(size=shp) * np.sqrt( 1.0 / (28 * 28 + 13 * 13 * 20))), Conv((5, 5, 20, 40), strides=2, activation=lkrelu, filter_init=lambda shp: np.random.normal(size=shp) * np.sqrt( 1.0 / (13 * 13 * 20 + 5 * 5 * 40))), Flatten((5, 5, 40)), FullyConnected((5 * 5 * 40, 100), activation=relu, weight_init=lambda shp: np.random.normal(size=shp) * np. sqrt(1.0 / (5 * 5 * 40 + 100.))), FullyConnected((100, 2), activation=sigmoid, weight_init=lambda shp: np.random.normal(size=shp) * np. sqrt(1.0 / (110.))) ] lr = 0.000000001 net = Network(layers, lr=lr, loss=mse) # If you want to continue training a network uncomment the line below # net = pickle.load(open("network.nn", "rb"))